def setup_method(self): nybb_filename = geopandas.datasets.get_path('nybb') self.polydf = read_file(nybb_filename) self.polydf = self.polydf[['geometry', 'BoroName', 'BoroCode']] self.polydf = self.polydf.rename(columns={'geometry': 'myshapes'}) self.polydf = self.polydf.set_geometry('myshapes') self.polydf['manhattan_bronx'] = 5 self.polydf.loc[3:4, 'manhattan_bronx'] = 6 # Merged geometry manhattan_bronx = self.polydf.loc[3:4, ] others = self.polydf.loc[0:2, ] collapsed = [others.geometry.unary_union, manhattan_bronx.geometry.unary_union] merged_shapes = GeoDataFrame( {'myshapes': collapsed}, geometry='myshapes', index=pd.Index([5, 6], name='manhattan_bronx')) # Different expected results self.first = merged_shapes.copy() self.first['BoroName'] = ['Staten Island', 'Manhattan'] self.first['BoroCode'] = [5, 1] self.mean = merged_shapes.copy() self.mean['BoroCode'] = [4, 1.5]
def calc_pandas(self): features = self.inputs[0] original_projection = self.inputs[0].get_epsg() epsg = original_projection srs = osr.SpatialReference() srs.ImportFromEPSG(int(original_projection)) if not srs.GetAttrValue('UNIT').lower().startswith('met'): epsg = 3857 else: original_projection = None features_df = features.read(epsg=epsg) features_gs = features_df.geometry point_df = self.inputs[1].read(epsg=epsg)[:1] point_gs = point_df.geometry features_length = len(features_gs) min_dist = np.empty(features_length) for i, feature in enumerate(features_gs): min_dist[i] = np.min([feature.distance(point_gs[0])]) nearby_df = GeoDataFrame.copy(features_df) nearby_df['distance'] = min_dist distance_max = self.distance nearby_df = nearby_df[(nearby_df['distance'] <= distance_max)]\ .sort_values('distance') if original_projection: nearby_df[nearby_df.geometry.name] = \ nearby_df.geometry.to_crs(epsg=original_projection) return nearby_df
def calc_pandas(self): first = self.inputs[0] original_projection = first.get_epsg() epsg = original_projection srs = osr.SpatialReference() srs.ImportFromEPSG(int(original_projection)) if not srs.GetAttrValue('UNIT').lower().startswith('met'): epsg = 3857 else: original_projection = None first_df = first.read(epsg=epsg) first_gs = first_df.geometry first_length = len(first_gs) second_df = self.inputs[1].read(epsg=epsg) second_gs = second_df.geometry min_dist = np.empty(first_length) for i, first_features in enumerate(first_gs): min_dist[i] = np.min([first_features.distance(second_features) for second_features in second_gs]) distance_df = GeoDataFrame.copy(first_df) distance_df['distance'] = min_dist distance_df.sort_values('distance', inplace=True) if original_projection: distance_df[distance_df.geometry.name] = \ distance_df.geometry.to_crs(epsg=original_projection) return distance_df
def dfs(request): polys1 = GeoSeries( [Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]), Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]), Polygon([(6, 0), (9, 0), (9, 3), (6, 3)])]) polys2 = GeoSeries( [Polygon([(1, 1), (4, 1), (4, 4), (1, 4)]), Polygon([(4, 4), (7, 4), (7, 7), (4, 7)]), Polygon([(7, 7), (10, 7), (10, 10), (7, 10)])]) df1 = GeoDataFrame({'geometry': polys1, 'df1': [0, 1, 2]}) df2 = GeoDataFrame({'geometry': polys2, 'df2': [3, 4, 5]}) if request.param == 'string-index': df1.index = ['a', 'b', 'c'] df2.index = ['d', 'e', 'f'] # construction expected frames expected = {} part1 = df1.copy().reset_index().rename( columns={'index': 'index_left'}) part2 = df2.copy().iloc[[0, 1, 1, 2]].reset_index().rename( columns={'index': 'index_right'}) part1['_merge'] = [0, 1, 2] part2['_merge'] = [0, 0, 1, 3] exp = pd.merge(part1, part2, on='_merge', how='outer') expected['intersects'] = exp.drop('_merge', axis=1).copy() part1 = df1.copy().reset_index().rename( columns={'index': 'index_left'}) part2 = df2.copy().reset_index().rename( columns={'index': 'index_right'}) part1['_merge'] = [0, 1, 2] part2['_merge'] = [0, 3, 3] exp = pd.merge(part1, part2, on='_merge', how='outer') expected['contains'] = exp.drop('_merge', axis=1).copy() part1['_merge'] = [0, 1, 2] part2['_merge'] = [3, 1, 3] exp = pd.merge(part1, part2, on='_merge', how='outer') expected['within'] = exp.drop('_merge', axis=1).copy() return [request.param, df1, df2, expected]
def calc_pandas(self): first, second = self.inputs[0], self.inputs[1] first_df = first.read() second_df = second.read(epsg=first.get_epsg()) first_gs = first_df.geometry first_length = len(first_gs) second_gs = second_df.geometry matches = np.empty(first_length) for i, first_features in enumerate(first_gs): matched = [first_features.equals(second_features) for second_features in second_gs] matches[i] = True if (True in matched) else False output_df = GeoDataFrame.copy(first_df) output_df['equals'] = matches output_df = output_df[ (output_df['equals'] == 1)].drop('equals', 1) return output_df
def calc_pandas(self): featureio = self.inputs[0] original_projection = featureio.get_epsg() epsg = original_projection srs = osr.SpatialReference() srs.ImportFromEPSG(int(original_projection)) if not srs.GetAttrValue('UNIT').lower().startswith('met'): epsg = 3857 else: original_projection = None feature_df = GeoDataFrame.copy(featureio.read(epsg=epsg)) feature_df['length'] = feature_df.geometry.length if original_projection: feature_df[feature_df.geometry.name] = feature_df.geometry.to_crs( epsg=original_projection) feature_df.crs = fiona.crs.from_epsg(original_projection) return feature_df
def calc_pandas(self): featureio = self.inputs[0] original_projection = featureio.get_epsg() epsg = original_projection srs = osr.SpatialReference() srs.ImportFromEPSG(int(original_projection)) if not srs.GetAttrValue('UNIT').lower().startswith('met'): epsg = 3857 else: original_projection = None feature_df = GeoDataFrame.copy(featureio.read(epsg=epsg)) feature_df['area'] = feature_df.geometry.area if original_projection: feature_df[feature_df.geometry.name] = feature_df.geometry.to_crs( epsg=original_projection) feature_df.crs = fiona.crs.from_epsg(original_projection) return feature_df
def calc_pandas(self): first, second = self.inputs[0], self.inputs[1] first_df = first.read() second_df = second.read(epsg=first.get_epsg()) first_gs = first_df.geometry first_length = len(first_gs) second_gs = second_df.geometry matches = np.empty(first_length) for i, first_features in enumerate(first_gs): matched = [ first_features.equals(second_features) for second_features in second_gs ] matches[i] = True if (True in matched) else False output_df = GeoDataFrame.copy(first_df) output_df['equals'] = matches output_df = output_df[(output_df['equals'] == 1)].drop('equals', 1) return output_df
def test_copy(self): arr = from_shapely(self.geoms, crs=27700) s = GeoSeries(self.geoms, crs=4326) df = GeoDataFrame(s, geometry=arr, columns=["col1"]) arr_copy = arr.copy() assert arr_copy.crs == arr.crs s_copy = s.copy() assert s_copy.crs == s.crs assert s_copy.values.crs == s.values.crs df_copy = df.copy() assert df_copy.crs == df.crs assert df_copy.geometry.crs == df.geometry.crs assert df_copy.geometry.values.crs == df.geometry.values.crs assert df_copy.col1.crs == df.col1.crs assert df_copy.col1.values.crs == df.col1.values.crs
def average_year_seasons( self, hex_data: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame: hex_data_copy = hex_data.copy() new_hex_data = hex_data_copy[['polyid', 'geometry']] unique_seasons = self.unique(hex_data_copy.columns) unique_seasons = set(unique_seasons) - set(list(["geometry", "polyid" ])) for year_season in unique_seasons: df = hex_data_copy.copy(deep=True)[year_season] if type(df) == pandas.Series: new_hex_data[year_season] = df continue df['polyid'] = hex_data_copy['polyid'] df = df.set_index(['polyid']) df = df.groupby(by=df.columns, axis=1).mean() df = df.reset_index() new_hex_data[year_season] = df[year_season] print(new_hex_data) return new_hex_data
def seasonal_variance( self, hex_data: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame: hex_data_copy = hex_data.copy() new_hex_data = hex_data_copy[['polyid', 'geometry']] unique_seasons = hex_data_copy.columns unique_seasons = set(unique_seasons) - set(list(["geometry", "polyid" ])) seasons = self.unique([i.split("_")[1] for i in unique_seasons]) for season in seasons: relevant_fields = [ x for x in unique_seasons if x.endswith('{}_yoy'.format(season)) ] season_df = hex_data_copy[list(relevant_fields)] variance = season_df.var(axis=1) new_hex_data['{}_var'.format(season)] = variance print(new_hex_data) return new_hex_data
def _df_to_boundaries(df: pd.DataFrame, boundaries: GeoDataFrame, aggfunc=np.sum): """ Aggreggates point data to the corresponding polygon boundaries Parameters ---------- df : pd.DataFrame of lat/long data to be aggregated, or GeoDataFrame with valid point geometry boundaries : GeoSeries of polygon geometry aggfunc : function, str, list or dict to aggregate numeric cols to polygon as per pd.DataFrame.agg(aggfunc) Returns ------- PorygonDataFrame of the dataframe aggregated to polygon, with index 'id' of the boundaries's 'id' index """ # Validate df df = _validate_point_data(df) if not isinstance(df, GeoDataFrame): df = df_to_gpdf(df) # Validate boundaries assert boundaries.index.is_unique, 'PorygonDataFrame requires a unique index' assert type( boundaries.index ) != pd.MultiIndex, 'PorygonDataFrame does not support MultiIndex' if boundaries.index.name != 'id': logging.warning( f'Renaming boundary index from {boundaries.index.name} to "id"') boundaries.index.name = 'id' if isinstance(boundaries, GeoSeries): srs = boundaries.copy() else: srs = boundaries['geometry'] df = _assign_polygon_index(df, srs) df = df.drop(columns='geometry').groupby('id').agg(aggfunc).reset_index() gpdf = pd.merge(df.reset_index(), boundaries, on='id') return PorygonDataFrame(gpdf.set_index('id'))
def get_edges_within_dist(graph_edges: GeoDataFrame, coord: np.ndarray, dist_retain: float) -> GeoDataFrame: """ Given a point returns all edges that fall within a radius of dist. :param graph_edges: gdf of edges with columns [u, v, k, geometry] :param coord: central point :param dist_retain: metres, retain radius :return: gdf of edges with columns [u, v, k, geometry, distance_to_obs] all with distance_to_obs < dist_retain """ graph_edges_dist = graph_edges.copy() graph_edges_dist['distance_to_obs'] = graph_edges['geometry'].apply( lambda geom: Point(tuple(coord)).distance(geom)) edges_within_dist = graph_edges_dist[ graph_edges_dist['distance_to_obs'] < dist_retain] return edges_within_dist
def reunion( no_bldg: gpd.GeoDataFrame, has_bldg: gpd.GeoDataFrame, bldgs_df: gpd.GeoDataFrame, ) -> gpd.GeoDataFrame: """ Map each orphaned parcel in no_bldg to the proper parent parcel in has_bldg, using the uID field to map buildings to parcels. """ reunioned = no_bldg.copy() reunioned['uID'] = [ find_parent_parcel_id(orphan, has_bldg, bldgs_df) for orphan in reunioned['geometry'] ] reunioned = pd.concat([reunioned, has_bldg]) reunioned = reunioned.dissolve(by='uID') reunioned.reset_index(inplace=True) return reunioned
def split_france_french_guiana( self, world: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame: """ Splits up France into two regions: (1) main France and (2) French Guiana. This is done because the Natural Earth map combines both these regions together (so French Guiana is a part of Europe). But, the UN standard region codes groups French Guiana as part of South America. :param world: The GeoDataFrame representing the world. :return: A new GeoDataFrame with France and French Guiana split (or the original data frame if some error occured). """ world_new = world if len(world[world['iso_a3'] == 'GUF']) != 0: logger.info( 'French Guiana [GUF] already exists in world map. Will not attempt to split France region.' ) else: shapes_france = world[world['iso_a3'] == 'FRA']['geometry'].values[0] split_regions = self.split_geoms_france(shapes_france) if split_regions is not None: world_new = world.copy() # Update France geometry original_france_entry = world_new.loc[world_new['iso_a3'] == 'FRA', 'geometry'] new_france_entry = geopandas.GeoSeries( split_regions['FRA'], index=original_france_entry.index) world_new.loc[world_new['iso_a3'] == 'FRA', 'geometry'] = new_france_entry # Add French Guiana geometry french_guiana_row = world_new.loc[world_new['iso_a3'] == 'FRA'].reset_index().drop( columns=['index']) french_guiana_row['iso_a3'] = 'GUF' french_guiana_row['name'] = 'French Guiana' french_guiana_row['continent'] = 'South America' french_guiana_row['geometry'] = split_regions['GUF'] world_new = world_new.append(french_guiana_row) return world_new
def get_orphaned_polys( tessellations: gpd.GeoDataFrame, bldgs: gpd.GeoDataFrame, ) -> Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: """ The Morphological Tess from Momepy probably has orphaned parcels in it resulting from the non-convex block. This splits the parcels into those w/ a building and those w/o a building (i.e. orphaned) Args: tessellations: output dataframe from momepy.Tessellation bldgs: dataframe of building polygons Returns: Two dataframes of parcels w/o and w/ buildings, respectively Geometry type for each parcel is polygon (never multipolygon) """ # Get the multi polys t = tessellations.copy() t['is_mp'] = t.type.isin(["MultiPolygon"]) tess_multips = t[t['is_mp']] tess_multips = tess_multips.explode() # Sjoin against buildings tess_multips = gpd.sjoin(tess_multips, bldgs, how='left', op='intersects') # Keep only those w/o building orphan_idx = tess_multips['index_right'].isna() no_bldg = tess_multips[orphan_idx] has_bldg = tess_multips[~orphan_idx] # Add back the earlier polygons for completeness no_bldg = no_bldg[['geometry']] has_bldg = has_bldg[['uID_left', 'geometry']].rename(columns={'uID_left': 'uID'}) orig_bldg = t[~t['is_mp']][['uID', 'geometry']] has_bldg = pd.concat([has_bldg, orig_bldg]) has_bldg.reset_index(drop=True, inplace=True) no_bldg.reset_index(drop=True, inplace=True) return no_bldg, has_bldg
def geocoding(self, data: gpd.GeoDataFrame, field: str): """ :param data: A geopandas.GeoDataFrame :param field: The field of addresses whose latitude & longitude coordinates will be searched-for :return: A GeoDataFrame consisting of field, a geometry object, address, latitude, and longitude. If an instance of field is not found a record will not be associated. """ instances = data.copy() estimates = instances.apply(lambda x: self.via(x[field]), axis=1) estimates.dropna(axis=0, how='any', inplace=True) instances = instances.join(estimates, how='inner') instances['latitude'] = instances.geometry.y instances['longitude'] = instances.geometry.x return instances
def pivot_grid(in_gdf: gpd.GeoDataFrame, noise_std_dev) -> pd.DataFrame: """ Convert the transmit grid dataframe into a dataframe with the starting frequency of each grid square as a column and timestamps in ms as the index. Add in gaussian noise to help with singular matrix issues when trying to predict future values :param in_gdf: :return: """ # make a copy so we don't accidentally modify the source tx_grid_gdf = in_gdf.copy() # turn any NaN duty cycle values to zeros tx_grid_gdf = tx_grid_gdf.fillna(value=0) # add some noise to the duty cycle values to prevent pyflux from having singular matrix problems later noise = np.abs( np.random.normal(size=len(tx_grid_gdf.index), scale=noise_std_dev)) tx_grid_gdf["duty_cycle"] = tx_grid_gdf["duty_cycle"] + noise # # clip duty cycles to be between 0 and 1.0 # tx_grid_gdf.loc[tx_grid_gdf["duty_cycle"] < 0, "duty_cycle"] = 0.0 # tx_grid_gdf.loc[tx_grid_gdf["duty_cycle"] > 1, "duty_cycle"] = 1.0 # switch back to pure pandas tx_pivot_df = pd.DataFrame(tx_grid_gdf) tx_pivot_df = tx_pivot_df.drop(columns=["geometry"]) # pyflux wants column values as strings or it blows up tx_pivot_df["start_freq_str"] = tx_pivot_df["start_freq"].apply(str) # turn start frequency values into columns tx_pivot_df = tx_pivot_df.pivot_table(values='duty_cycle', index="start_time_ms", columns='start_freq_str', aggfunc='first') return tx_pivot_df
def __init__( self, gdf: gpd.GeoDataFrame, size_column: str = None, mode: int = 1, time_limit: int = 300, ) -> None: """ :param gdf: :param map_type: :param size_column: :param mode: :param time_limit: """ self.crs = gdf.crs self.gdf_original = gdf.copy() self.gdf = self.gdf_original.to_crs(3857) self.size_column = size_column self.mode = mode self.time_limit = time_limit logging.debug("Initialized Cartogram")
def seasonal_average( self, hex_data: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame: hex_data_copy = hex_data.copy() new_hex_data = hex_data_copy[['polyid', 'geometry']] unique_seasons = hex_data_copy.columns unique_seasons = set(unique_seasons) - set(list(["geometry", "polyid" ])) seasons = self.unique([i.split("_")[1] for i in unique_seasons]) for season in seasons: relevant_fields = [ x for x in unique_seasons if x.endswith('{}'.format(season)) ] season_df = hex_data_copy[list(relevant_fields)] mean = season_df.mean(axis=1) new_hex_data['{}_mean'.format(season)] = mean relevant_fields = [x for x in unique_seasons] season_df = hex_data_copy[list(relevant_fields)] total_mean = season_df.mean(axis=1) new_hex_data['total_mean'] = total_mean print(new_hex_data) return new_hex_data
def drop_from_mask(self, mask: gpd.GeoDataFrame) -> int: """Drop points contained in the given mask. Args: mask (:obj:`geopandas.GeoDataFrame`): The mask used to drop internal points. Note: * The mask must be a :py:obj:`_GpsBase` or ``geopandas.GeoDataFrame`` object. * If the mask has a `radius` column, it will be used and drop all points at a distance smaller than the `radius` values. Returns: int: The number of dropped points. """ mask = mask.copy() if isinstance(mask, pd.Series): mask = gpd.GeoDataFrame(mask.to_frame("geometry"), crs=mask.crs) # Project the mask if needed if self.crs is not None: mask = mask.to_crs(self.crs, inplace=False) # Get the points included in masks in_mask_pts = pd.Series(np.zeros(len(self)), dtype=bool) for num, i in mask.iterrows(): in_mask_pts = in_mask_pts | (self.geometry.distance(i.geometry) <= i.get("radius", 0)) # Count the number of points that are going to be dropped N = in_mask_pts.sum() # Drop points in mask self.drop(in_mask_pts.loc[in_mask_pts].index, inplace=True) self.reset_index(drop=True, inplace=True) return N
def sample_aq_to_point_gdf(sampling_gdf: GeoDataFrame, aq_tif_file: str, aq_attr_name: str) -> GeoDataFrame: """Joins AQI values from an AQI raster file to edges (edge_gdf) of a graph by spatial sampling. Column 'aqi' will be added to the G.edge_gdf. Center points of the edges are used in the spatial join. Exports a csv file of ege keys and corresponding AQI values to use for updating AQI values to a graph. Args: G: A GraphHandler object that has edge_gdf and graph as properties. aqi_tif_name: The filename of an AQI raster (GeoTiff) file (in aqi_cache directory). Todo: Implement more precise join for longer edges. Returns: The name of the exported csv file (e.g. aqi_2019-11-08T14.csv). """ gdf = sampling_gdf.copy() aqi_raster = rasterio.open(aq_tif_file) # get coordinates of edge centers as list of tuples coords = [(x, y) for x, y in zip([point.x for point in gdf['point_geom']], [point.y for point in gdf['point_geom']])] coords = round_coordinates(coords) # extract aqi values at coordinates from raster using sample method from rasterio gdf[aq_attr_name] = [round(x.item(), 2) for x in aqi_raster.sample(coords)] return gdf
def plot_cluster(gdf: geopandas.GeoDataFrame, fig_location: str = None, show_figure: bool = False): """ Vykresleni grafu s lokalitou vsech nehod v kraji shlukovanych do clusteru """ gdf = gdf.loc[gdf['region'] == 'JHM'] coords = np.dstack([gdf.geometry.x, gdf.geometry.y]).reshape(-1, 2) model = sklearn.cluster.MiniBatchKMeans(n_clusters=19).fit(coords) gdf2 = gdf.copy() gdf2['cluster'] = model.labels_ gdf2 = gdf2.dissolve(by='cluster', aggfunc={'p1': 'count'}).rename(columns={'p1': 'cnt'}) x, y = (model.cluster_centers_[:, 0], model.cluster_centers_[:, 1]) gdf_coords = geopandas.GeoDataFrame(geometry=geopandas.points_from_xy(x, y), crs='EPSG:5514') gdf3 = gdf2.merge(gdf_coords, left_on='cluster', right_index=True).set_geometry('geometry_y') gdf4 = gdf3.to_crs('epsg:3857') gdf5 = gdf.to_crs('epsg:3857') fig, ax = plt.subplots(figsize=(16, 12)) gdf4.plot(ax=ax, markersize=gdf4['cnt'], column='cnt', legend=True, legend_kwds={'shrink': 0.85}, alpha=0.8) gdf5.plot(ax=ax, color='purple', markersize=1, alpha=0.9) xmin, xmax = ax.get_xlim() xmin += 67865 ax.set_xlim(xmin, xmax) ax.axis('off') ax.title.set_text('Nehody v JHM kraji') ctx.add_basemap(ax, crs=gdf4.crs.to_string(), source=ctx.providers.Stamen.TonerLite) if fig_location is not None: fig.savefig(fig_location) if show_figure: plt.show()
def geocoding(self, data: gpd.GeoDataFrame, field: str): """ :param data: A data set that includes a field of addresses whose latitude & longitude coordinates will be searched-for :param field: The field of addresses whose latitude & longitude coordinates will be searched-for :return: A GeoDataFrame of consisting of field, a locale object, a geometry object, address, latitude, and longitude. If an instance of field is not found a record will not be associated. """ instances = data.copy() instances['locale'] = self.via(instances[field]) instances['geometry'] = instances.locale.apply(lambda i: tuple(i.point) if i else None) instances['address'] = instances.locale.apply(lambda i: i.address if i else None) instances['latitude'] = instances.locale.apply(lambda i: i.latitude if i else None) instances['longitude'] = instances.locale.apply(lambda i: i.longitude if i else None) return instances
class TestDataFrame(unittest.TestCase): def setUp(self): N = 10 nybb_filename = download_nybb() self.df = read_file("/nybb_14a_av/nybb.shp", vfs="zip://" + nybb_filename) with fiona.open("/nybb_14a_av/nybb.shp", vfs="zip://" + nybb_filename) as f: self.schema = f.schema self.tempdir = tempfile.mkdtemp() self.boros = self.df["BoroName"] self.crs = {"init": "epsg:4326"} self.df2 = GeoDataFrame( [{"geometry": Point(x, y), "value1": x + y, "value2": x * y} for x, y in zip(range(N), range(N))], crs=self.crs, ) self.df3 = read_file("examples/null_geom.geojson") self.line_paths = self.df3["Name"] def tearDown(self): shutil.rmtree(self.tempdir) def test_df_init(self): self.assertTrue(type(self.df2) is GeoDataFrame) self.assertTrue(self.df2.crs == self.crs) def test_different_geo_colname(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry="location") locs = GeoSeries(data["location"], crs=self.crs) assert_geoseries_equal(df.geometry, locs) self.assert_("geometry" not in df) self.assertEqual(df.geometry.name, "location") # internal implementation detail self.assertEqual(df._geometry_column_name, "location") geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))] df2 = df.set_geometry(geom2, crs="dummy_crs") self.assert_("geometry" in df2) self.assert_("location" in df2) self.assertEqual(df2.crs, "dummy_crs") self.assertEqual(df2.geometry.crs, "dummy_crs") # reset so it outputs okay df2.crs = df.crs assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs)) # for right now, non-geometry comes back as series assert_geoseries_equal(df2["location"], df["location"], check_series_type=False, check_dtype=False) def test_geo_getitem(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry="location") self.assert_(isinstance(df.geometry, GeoSeries)) df["geometry"] = df["A"] self.assert_(isinstance(df.geometry, GeoSeries)) self.assertEqual(df.geometry[0], data["location"][0]) # good if this changed in the future self.assert_(not isinstance(df["geometry"], GeoSeries)) self.assert_(isinstance(df["location"], GeoSeries)) data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5), range(5))] df = GeoDataFrame(data, crs=self.crs) self.assert_(isinstance(df.geometry, GeoSeries)) self.assert_(isinstance(df["geometry"], GeoSeries)) # good if this changed in the future self.assert_(not isinstance(df["location"], GeoSeries)) def test_geometry_property(self): assert_geoseries_equal(self.df.geometry, self.df["geometry"], check_dtype=True, check_index_type=True) df = self.df.copy() new_geom = [Point(x, y) for x, y in zip(range(len(self.df)), range(len(self.df)))] df.geometry = new_geom new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs) assert_geoseries_equal(df.geometry, new_geom) assert_geoseries_equal(df["geometry"], new_geom) # new crs gs = GeoSeries(new_geom, crs="epsg:26018") df.geometry = gs self.assertEqual(df.crs, "epsg:26018") def test_geometry_property_errors(self): with self.assertRaises(AttributeError): df = self.df.copy() del df["geometry"] df.geometry # list-like error with self.assertRaises(ValueError): df = self.df2.copy() df.geometry = "value1" # list-like error with self.assertRaises(ValueError): df = self.df.copy() df.geometry = "apple" # non-geometry error with self.assertRaises(TypeError): df = self.df.copy() df.geometry = list(range(df.shape[0])) with self.assertRaises(KeyError): df = self.df.copy() del df["geometry"] df["geometry"] # ndim error with self.assertRaises(ValueError): df = self.df.copy() df.geometry = df def test_set_geometry(self): geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))]) original_geom = self.df.geometry df2 = self.df.set_geometry(geom) self.assert_(self.df is not df2) assert_geoseries_equal(df2.geometry, geom) assert_geoseries_equal(self.df.geometry, original_geom) assert_geoseries_equal(self.df["geometry"], self.df.geometry) # unknown column with self.assertRaises(ValueError): self.df.set_geometry("nonexistent-column") # ndim error with self.assertRaises(ValueError): self.df.set_geometry(self.df) # new crs - setting should default to GeoSeries' crs gs = GeoSeries(geom, crs="epsg:26018") new_df = self.df.set_geometry(gs) self.assertEqual(new_df.crs, "epsg:26018") # explicit crs overrides self and dataframe new_df = self.df.set_geometry(gs, crs="epsg:27159") self.assertEqual(new_df.crs, "epsg:27159") self.assertEqual(new_df.geometry.crs, "epsg:27159") # Series should use dataframe's new_df = self.df.set_geometry(geom.values) self.assertEqual(new_df.crs, self.df.crs) self.assertEqual(new_df.geometry.crs, self.df.crs) def test_set_geometry_col(self): g = self.df.geometry g_simplified = g.simplify(100) self.df["simplified_geometry"] = g_simplified df2 = self.df.set_geometry("simplified_geometry") # Drop is false by default self.assert_("simplified_geometry" in df2) assert_geoseries_equal(df2.geometry, g_simplified) # If True, drops column and renames to geometry df3 = self.df.set_geometry("simplified_geometry", drop=True) self.assert_("simplified_geometry" not in df3) assert_geoseries_equal(df3.geometry, g_simplified) def test_set_geometry_inplace(self): geom = [Point(x, y) for x, y in zip(range(5), range(5))] ret = self.df.set_geometry(geom, inplace=True) self.assert_(ret is None) geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs) assert_geoseries_equal(self.df.geometry, geom) def test_set_geometry_series(self): # Test when setting geometry with a Series that # alignment will occur # # Reverse the index order # Set the Series to be Point(i,i) where i is the index self.df.index = range(len(self.df) - 1, -1, -1) d = {} for i in range(len(self.df)): d[i] = Point(i, i) g = GeoSeries(d) # At this point, the DataFrame index is [4,3,2,1,0] and the # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns # them to match indexes df = self.df.set_geometry(g) for i, r in df.iterrows(): self.assertAlmostEqual(i, r["geometry"].x) self.assertAlmostEqual(i, r["geometry"].y) def test_to_json(self): text = self.df.to_json() data = json.loads(text) self.assertTrue(data["type"] == "FeatureCollection") self.assertTrue(len(data["features"]) == 5) def test_to_json_geom_col(self): df = self.df.copy() df["geom"] = df["geometry"] df["geometry"] = np.arange(len(df)) df.set_geometry("geom", inplace=True) text = df.to_json() data = json.loads(text) self.assertTrue(data["type"] == "FeatureCollection") self.assertTrue(len(data["features"]) == 5) def test_to_json_na(self): # Set a value as nan and make sure it's written self.df.loc[self.df["BoroName"] == "Queens", "Shape_Area"] = np.nan text = self.df.to_json() data = json.loads(text) self.assertTrue(len(data["features"]) == 5) for f in data["features"]: props = f["properties"] self.assertEqual(len(props), 4) if props["BoroName"] == "Queens": self.assertTrue(props["Shape_Area"] is None) def test_to_json_bad_na(self): # Check that a bad na argument raises error with self.assertRaises(ValueError): text = self.df.to_json(na="garbage") def test_to_json_dropna(self): self.df.loc[self.df["BoroName"] == "Queens", "Shape_Area"] = np.nan self.df.loc[self.df["BoroName"] == "Bronx", "Shape_Leng"] = np.nan text = self.df.to_json(na="drop") data = json.loads(text) self.assertEqual(len(data["features"]), 5) for f in data["features"]: props = f["properties"] if props["BoroName"] == "Queens": self.assertEqual(len(props), 3) self.assertTrue("Shape_Area" not in props) # Just make sure setting it to nan in a different row # doesn't affect this one self.assertTrue("Shape_Leng" in props) elif props["BoroName"] == "Bronx": self.assertEqual(len(props), 3) self.assertTrue("Shape_Leng" not in props) self.assertTrue("Shape_Area" in props) else: self.assertEqual(len(props), 4) def test_to_json_keepna(self): self.df.loc[self.df["BoroName"] == "Queens", "Shape_Area"] = np.nan self.df.loc[self.df["BoroName"] == "Bronx", "Shape_Leng"] = np.nan text = self.df.to_json(na="keep") data = json.loads(text) self.assertEqual(len(data["features"]), 5) for f in data["features"]: props = f["properties"] self.assertEqual(len(props), 4) if props["BoroName"] == "Queens": self.assertTrue(np.isnan(props["Shape_Area"])) # Just make sure setting it to nan in a different row # doesn't affect this one self.assertTrue("Shape_Leng" in props) elif props["BoroName"] == "Bronx": self.assertTrue(np.isnan(props["Shape_Leng"])) self.assertTrue("Shape_Area" in props) def test_copy(self): df2 = self.df.copy() self.assertTrue(type(df2) is GeoDataFrame) self.assertEqual(self.df.crs, df2.crs) def test_to_file(self): """ Test to_file and from_file """ tempfilename = os.path.join(self.tempdir, "boros.shp") self.df.to_file(tempfilename) # Read layer back in df = GeoDataFrame.from_file(tempfilename) self.assertTrue("geometry" in df) self.assertTrue(len(df) == 5) self.assertTrue(np.alltrue(df["BoroName"].values == self.boros)) # Write layer with null geometry out to file tempfilename = os.path.join(self.tempdir, "null_geom.shp") self.df3.to_file(tempfilename) # Read layer back in df3 = GeoDataFrame.from_file(tempfilename) self.assertTrue("geometry" in df3) self.assertTrue(len(df3) == 2) self.assertTrue(np.alltrue(df3["Name"].values == self.line_paths)) def test_to_file_types(self): """ Test various integer type columns (GH#93) """ tempfilename = os.path.join(self.tempdir, "int.shp") int_types = [ np.int, np.int8, np.int16, np.int32, np.int64, np.intp, np.uint8, np.uint16, np.uint32, np.uint64, np.long, ] geometry = self.df2.geometry data = dict((str(i), np.arange(len(geometry), dtype=dtype)) for i, dtype in enumerate(int_types)) df = GeoDataFrame(data, geometry=geometry) df.to_file(tempfilename) def test_mixed_types_to_file(self): """ Test that mixed geometry types raise error when writing to file """ tempfilename = os.path.join(self.tempdir, "test.shp") s = GeoDataFrame({"geometry": [Point(0, 0), Polygon([(0, 0), (1, 0), (1, 1)])]}) with self.assertRaises(ValueError): s.to_file(tempfilename) def test_to_file_schema(self): """ Ensure that the file is written according to the schema if it is specified """ try: from collections import OrderedDict except ImportError: from ordereddict import OrderedDict tempfilename = os.path.join(self.tempdir, "test.shp") properties = OrderedDict( [ ("Shape_Leng", "float:19.11"), ("BoroName", "str:40"), ("BoroCode", "int:10"), ("Shape_Area", "float:19.11"), ] ) schema = {"geometry": "Polygon", "properties": properties} # Take the first 2 features to speed things up a bit self.df.iloc[:2].to_file(tempfilename, schema=schema) with fiona.open(tempfilename) as f: result_schema = f.schema self.assertEqual(result_schema, schema) def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df["BoroName"].str.contains("B")] self.assertTrue(len(df) == 2) boros = df["BoroName"].values self.assertTrue("Brooklyn" in boros) self.assertTrue("Bronx" in boros) self.assertTrue(type(df) is GeoDataFrame) def test_transform(self): df2 = self.df2.copy() df2.crs = {"init": "epsg:26918", "no_defs": True} lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) self.assertTrue(all(df2["geometry"].geom_almost_equals(utm["geometry"], decimal=2))) def test_from_features(self): nybb_filename = download_nybb() with fiona.open("/nybb_14a_av/nybb.shp", vfs="zip://" + nybb_filename) as f: features = list(f) crs = f.crs df = GeoDataFrame.from_features(features, crs=crs) df.rename(columns=lambda x: x.lower(), inplace=True) validate_boro_df(self, df) self.assert_(df.crs == crs) def test_from_features_unaligned_properties(self): p1 = Point(1, 1) f1 = {"type": "Feature", "properties": {"a": 0}, "geometry": p1.__geo_interface__} p2 = Point(2, 2) f2 = {"type": "Feature", "properties": {"b": 1}, "geometry": p2.__geo_interface__} p3 = Point(3, 3) f3 = {"type": "Feature", "properties": {"a": 2}, "geometry": p3.__geo_interface__} df = GeoDataFrame.from_features([f1, f2, f3]) result = df[["a", "b"]] expected = pd.DataFrame.from_dict([{"a": 0, "b": np.nan}, {"a": np.nan, "b": 1}, {"a": 2, "b": np.nan}]) assert_frame_equal(expected, result) def test_from_postgis_default(self): con = connect("test_geopandas") if con is None or not create_db(self.df): raise unittest.case.SkipTest() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con) finally: if PANDAS_NEW_SQL_API: # It's not really a connection, it's an engine con = con.connect() con.close() validate_boro_df(self, df) def test_from_postgis_custom_geom_col(self): con = connect("test_geopandas") if con is None or not create_db(self.df): raise unittest.case.SkipTest() try: sql = """SELECT borocode, boroname, shape_leng, shape_area, geom AS __geometry__ FROM nybb;""" df = GeoDataFrame.from_postgis(sql, con, geom_col="__geometry__") finally: if PANDAS_NEW_SQL_API: # It's not really a connection, it's an engine con = con.connect() con.close() validate_boro_df(self, df) def test_dataframe_to_geodataframe(self): df = pd.DataFrame({"A": range(len(self.df)), "location": list(self.df.geometry)}, index=self.df.index) gf = df.set_geometry("location", crs=self.df.crs) self.assertIsInstance(df, pd.DataFrame) self.assertIsInstance(gf, GeoDataFrame) assert_geoseries_equal(gf.geometry, self.df.geometry) self.assertEqual(gf.geometry.name, "location") self.assert_("geometry" not in gf) gf2 = df.set_geometry("location", crs=self.df.crs, drop=True) self.assertIsInstance(df, pd.DataFrame) self.assertIsInstance(gf2, GeoDataFrame) self.assertEqual(gf2.geometry.name, "geometry") self.assert_("geometry" in gf2) self.assert_("location" not in gf2) self.assert_("location" in df) # should be a copy df.ix[0, "A"] = 100 self.assertEqual(gf.ix[0, "A"], 0) self.assertEqual(gf2.ix[0, "A"], 0) with self.assertRaises(ValueError): df.set_geometry("location", inplace=True) def test_geodataframe_geointerface(self): self.assertEqual(self.df.__geo_interface__["type"], "FeatureCollection") self.assertEqual(len(self.df.__geo_interface__["features"]), self.df.shape[0]) def test_geodataframe_geojson_no_bbox(self): geo = self.df._to_geo(na="null", show_bbox=False) self.assertFalse("bbox" in geo.keys()) for feature in geo["features"]: self.assertFalse("bbox" in feature.keys()) def test_geodataframe_geojson_bbox(self): geo = self.df._to_geo(na="null", show_bbox=True) self.assertTrue("bbox" in geo.keys()) self.assertEqual(len(geo["bbox"]), 4) self.assertTrue(isinstance(geo["bbox"], tuple)) for feature in geo["features"]: self.assertTrue("bbox" in feature.keys())
class TestDataFrame: def setup_method(self): N = 10 nybb_filename = geopandas.datasets.get_path('nybb') self.df = read_file(nybb_filename) self.tempdir = tempfile.mkdtemp() self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([ {'geometry': Point(x, y), 'value1': x + y, 'value2': x * y} for x, y in zip(range(N), range(N))], crs=self.crs) self.df3 = read_file( os.path.join(PACKAGE_DIR, 'examples', 'null_geom.geojson')) def teardown_method(self): shutil.rmtree(self.tempdir) def test_df_init(self): assert type(self.df2) is GeoDataFrame assert self.df2.crs == self.crs def test_different_geo_colname(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry='location') locs = GeoSeries(data['location'], crs=self.crs) assert_geoseries_equal(df.geometry, locs) assert 'geometry' not in df assert df.geometry.name == 'location' # internal implementation detail assert df._geometry_column_name == 'location' geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))] df2 = df.set_geometry(geom2, crs='dummy_crs') assert 'location' in df2 assert df2.crs == 'dummy_crs' assert df2.geometry.crs == 'dummy_crs' # reset so it outputs okay df2.crs = df.crs assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs)) def test_geo_getitem(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry='location') assert isinstance(df.geometry, GeoSeries) df['geometry'] = df["A"] assert isinstance(df.geometry, GeoSeries) assert df.geometry[0] == data['location'][0] # good if this changed in the future assert not isinstance(df['geometry'], GeoSeries) assert isinstance(df['location'], GeoSeries) data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5), range(5))] df = GeoDataFrame(data, crs=self.crs) assert isinstance(df.geometry, GeoSeries) assert isinstance(df['geometry'], GeoSeries) # good if this changed in the future assert not isinstance(df['location'], GeoSeries) def test_geometry_property(self): assert_geoseries_equal(self.df.geometry, self.df['geometry'], check_dtype=True, check_index_type=True) df = self.df.copy() new_geom = [Point(x, y) for x, y in zip(range(len(self.df)), range(len(self.df)))] df.geometry = new_geom new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs) assert_geoseries_equal(df.geometry, new_geom) assert_geoseries_equal(df['geometry'], new_geom) # new crs gs = GeoSeries(new_geom, crs="epsg:26018") df.geometry = gs assert df.crs == "epsg:26018" def test_geometry_property_errors(self): with pytest.raises(AttributeError): df = self.df.copy() del df['geometry'] df.geometry # list-like error with pytest.raises(ValueError): df = self.df2.copy() df.geometry = 'value1' # list-like error with pytest.raises(ValueError): df = self.df.copy() df.geometry = 'apple' # non-geometry error with pytest.raises(TypeError): df = self.df.copy() df.geometry = list(range(df.shape[0])) with pytest.raises(KeyError): df = self.df.copy() del df['geometry'] df['geometry'] # ndim error with pytest.raises(ValueError): df = self.df.copy() df.geometry = df def test_set_geometry(self): geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))]) original_geom = self.df.geometry df2 = self.df.set_geometry(geom) assert self.df is not df2 assert_geoseries_equal(df2.geometry, geom) assert_geoseries_equal(self.df.geometry, original_geom) assert_geoseries_equal(self.df['geometry'], self.df.geometry) # unknown column with pytest.raises(ValueError): self.df.set_geometry('nonexistent-column') # ndim error with pytest.raises(ValueError): self.df.set_geometry(self.df) # new crs - setting should default to GeoSeries' crs gs = GeoSeries(geom, crs="epsg:26018") new_df = self.df.set_geometry(gs) assert new_df.crs == "epsg:26018" # explicit crs overrides self and dataframe new_df = self.df.set_geometry(gs, crs="epsg:27159") assert new_df.crs == "epsg:27159" assert new_df.geometry.crs == "epsg:27159" # Series should use dataframe's new_df = self.df.set_geometry(geom.values) assert new_df.crs == self.df.crs assert new_df.geometry.crs == self.df.crs def test_set_geometry_col(self): g = self.df.geometry g_simplified = g.simplify(100) self.df['simplified_geometry'] = g_simplified df2 = self.df.set_geometry('simplified_geometry') # Drop is false by default assert 'simplified_geometry' in df2 assert_geoseries_equal(df2.geometry, g_simplified) # If True, drops column and renames to geometry df3 = self.df.set_geometry('simplified_geometry', drop=True) assert 'simplified_geometry' not in df3 assert_geoseries_equal(df3.geometry, g_simplified) def test_set_geometry_inplace(self): geom = [Point(x, y) for x, y in zip(range(5), range(5))] ret = self.df.set_geometry(geom, inplace=True) assert ret is None geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs) assert_geoseries_equal(self.df.geometry, geom) def test_set_geometry_series(self): # Test when setting geometry with a Series that # alignment will occur # # Reverse the index order # Set the Series to be Point(i,i) where i is the index self.df.index = range(len(self.df)-1, -1, -1) d = {} for i in range(len(self.df)): d[i] = Point(i, i) g = GeoSeries(d) # At this point, the DataFrame index is [4,3,2,1,0] and the # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns # them to match indexes df = self.df.set_geometry(g) for i, r in df.iterrows(): assert i == r['geometry'].x assert i == r['geometry'].y def test_align(self): df = self.df2 res1, res2 = df.align(df) assert_geodataframe_equal(res1, df) assert_geodataframe_equal(res2, df) res1, res2 = df.align(df.copy()) assert_geodataframe_equal(res1, df) assert_geodataframe_equal(res2, df) # assert crs is / is not preserved on mixed dataframes df_nocrs = df.copy() df_nocrs.crs = None res1, res2 = df.align(df_nocrs) assert_geodataframe_equal(res1, df) assert res1.crs is not None assert_geodataframe_equal(res2, df_nocrs) assert res2.crs is None # mixed GeoDataFrame / DataFrame df_nogeom = pd.DataFrame(df.drop('geometry', axis=1)) res1, res2 = df.align(df_nogeom, axis=0) assert_geodataframe_equal(res1, df) assert type(res2) == pd.DataFrame assert_frame_equal(res2, df_nogeom) # same as above but now with actual alignment df1 = df.iloc[1:].copy() df2 = df.iloc[:-1].copy() exp1 = df.copy() exp1.iloc[0] = np.nan exp2 = df.copy() exp2.iloc[-1] = np.nan res1, res2 = df1.align(df2) assert_geodataframe_equal(res1, exp1) assert_geodataframe_equal(res2, exp2) df2_nocrs = df2.copy() df2_nocrs.crs = None exp2_nocrs = exp2.copy() exp2_nocrs.crs = None res1, res2 = df1.align(df2_nocrs) assert_geodataframe_equal(res1, exp1) assert res1.crs is not None assert_geodataframe_equal(res2, exp2_nocrs) assert res2.crs is None df2_nogeom = pd.DataFrame(df2.drop('geometry', axis=1)) exp2_nogeom = pd.DataFrame(exp2.drop('geometry', axis=1)) res1, res2 = df1.align(df2_nogeom, axis=0) assert_geodataframe_equal(res1, exp1) assert type(res2) == pd.DataFrame assert_frame_equal(res2, exp2_nogeom) def test_to_json(self): text = self.df.to_json() data = json.loads(text) assert data['type'] == 'FeatureCollection' assert len(data['features']) == 5 def test_to_json_geom_col(self): df = self.df.copy() df['geom'] = df['geometry'] df['geometry'] = np.arange(len(df)) df.set_geometry('geom', inplace=True) text = df.to_json() data = json.loads(text) assert data['type'] == 'FeatureCollection' assert len(data['features']) == 5 def test_to_json_na(self): # Set a value as nan and make sure it's written self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan text = self.df.to_json() data = json.loads(text) assert len(data['features']) == 5 for f in data['features']: props = f['properties'] assert len(props) == 4 if props['BoroName'] == 'Queens': assert props['Shape_Area'] is None def test_to_json_bad_na(self): # Check that a bad na argument raises error with pytest.raises(ValueError): self.df.to_json(na='garbage') def test_to_json_dropna(self): self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan text = self.df.to_json(na='drop') data = json.loads(text) assert len(data['features']) == 5 for f in data['features']: props = f['properties'] if props['BoroName'] == 'Queens': assert len(props) == 3 assert 'Shape_Area' not in props # Just make sure setting it to nan in a different row # doesn't affect this one assert 'Shape_Leng' in props elif props['BoroName'] == 'Bronx': assert len(props) == 3 assert 'Shape_Leng' not in props assert 'Shape_Area' in props else: assert len(props) == 4 def test_to_json_keepna(self): self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan text = self.df.to_json(na='keep') data = json.loads(text) assert len(data['features']) == 5 for f in data['features']: props = f['properties'] assert len(props) == 4 if props['BoroName'] == 'Queens': assert np.isnan(props['Shape_Area']) # Just make sure setting it to nan in a different row # doesn't affect this one assert 'Shape_Leng' in props elif props['BoroName'] == 'Bronx': assert np.isnan(props['Shape_Leng']) assert 'Shape_Area' in props def test_copy(self): df2 = self.df.copy() assert type(df2) is GeoDataFrame assert self.df.crs == df2.crs def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df['BoroName'].str.contains('B')] assert len(df) == 2 boros = df['BoroName'].values assert 'Brooklyn' in boros assert 'Bronx' in boros assert type(df) is GeoDataFrame def test_coord_slice_points(self): assert self.df2.cx[-2:-1, -2:-1].empty assert_frame_equal(self.df2, self.df2.cx[:, :]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, :]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[:, 5:]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, 5:]) def test_from_features(self): nybb_filename = geopandas.datasets.get_path('nybb') with fiona.open(nybb_filename) as f: features = list(f) crs = f.crs df = GeoDataFrame.from_features(features, crs=crs) validate_boro_df(df, case_sensitive=True) assert df.crs == crs def test_from_features_unaligned_properties(self): p1 = Point(1, 1) f1 = {'type': 'Feature', 'properties': {'a': 0}, 'geometry': p1.__geo_interface__} p2 = Point(2, 2) f2 = {'type': 'Feature', 'properties': {'b': 1}, 'geometry': p2.__geo_interface__} p3 = Point(3, 3) f3 = {'type': 'Feature', 'properties': {'a': 2}, 'geometry': p3.__geo_interface__} df = GeoDataFrame.from_features([f1, f2, f3]) result = df[['a', 'b']] expected = pd.DataFrame.from_dict([{'a': 0, 'b': np.nan}, {'a': np.nan, 'b': 1}, {'a': 2, 'b': np.nan}]) assert_frame_equal(expected, result) def test_from_feature_collection(self): data = {'name': ['a', 'b', 'c'], 'lat': [45, 46, 47.5], 'lon': [-120, -121.2, -122.9]} df = pd.DataFrame(data) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] gdf = GeoDataFrame(df, geometry=geometry) # from_features returns sorted columns expected = gdf[['geometry', 'lat', 'lon', 'name']] # test FeatureCollection res = GeoDataFrame.from_features(gdf.__geo_interface__) assert_frame_equal(res, expected) # test list of Features res = GeoDataFrame.from_features(gdf.__geo_interface__['features']) assert_frame_equal(res, expected) # test __geo_interface__ attribute (a GeoDataFrame has one) res = GeoDataFrame.from_features(gdf) assert_frame_equal(res, expected) def test_from_postgis_default(self): con = connect('test_geopandas') if con is None or not create_postgis(self.df): raise pytest.skip() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con) finally: con.close() validate_boro_df(df, case_sensitive=False) def test_from_postgis_custom_geom_col(self): con = connect('test_geopandas') geom_col = "the_geom" if con is None or not create_postgis(self.df, geom_col=geom_col): raise pytest.skip() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col) finally: con.close() validate_boro_df(df, case_sensitive=False) def test_dataframe_to_geodataframe(self): df = pd.DataFrame({"A": range(len(self.df)), "location": list(self.df.geometry)}, index=self.df.index) gf = df.set_geometry('location', crs=self.df.crs) assert isinstance(df, pd.DataFrame) assert isinstance(gf, GeoDataFrame) assert_geoseries_equal(gf.geometry, self.df.geometry) assert gf.geometry.name == 'location' assert 'geometry' not in gf gf2 = df.set_geometry('location', crs=self.df.crs, drop=True) assert isinstance(df, pd.DataFrame) assert isinstance(gf2, GeoDataFrame) assert gf2.geometry.name == 'geometry' assert 'geometry' in gf2 assert 'location' not in gf2 assert 'location' in df # should be a copy df.loc[0, "A"] = 100 assert gf.loc[0, "A"] == 0 assert gf2.loc[0, "A"] == 0 with pytest.raises(ValueError): df.set_geometry('location', inplace=True) def test_geodataframe_geointerface(self): assert self.df.__geo_interface__['type'] == 'FeatureCollection' assert len(self.df.__geo_interface__['features']) == self.df.shape[0] def test_geodataframe_iterfeatures(self): df = self.df.iloc[:1].copy() df.loc[0, 'BoroName'] = np.nan # when containing missing values # null: ouput the missing entries as JSON null result = list(df.iterfeatures(na='null'))[0]['properties'] assert result['BoroName'] is None # drop: remove the property from the feature. result = list(df.iterfeatures(na='drop'))[0]['properties'] assert 'BoroName' not in result.keys() # keep: output the missing entries as NaN result = list(df.iterfeatures(na='keep'))[0]['properties'] assert np.isnan(result['BoroName']) # test for checking that the (non-null) features are python scalars and # not numpy scalars assert type(df.loc[0, 'Shape_Leng']) is np.float64 # null result = list(df.iterfeatures(na='null'))[0] assert type(result['properties']['Shape_Leng']) is float # drop result = list(df.iterfeatures(na='drop'))[0] assert type(result['properties']['Shape_Leng']) is float # keep result = list(df.iterfeatures(na='keep'))[0] assert type(result['properties']['Shape_Leng']) is float # when only having numerical columns df_only_numerical_cols = df[['Shape_Leng', 'Shape_Area', 'geometry']] assert type(df_only_numerical_cols.loc[0, 'Shape_Leng']) is np.float64 # null result = list(df_only_numerical_cols.iterfeatures(na='null'))[0] assert type(result['properties']['Shape_Leng']) is float # drop result = list(df_only_numerical_cols.iterfeatures(na='drop'))[0] assert type(result['properties']['Shape_Leng']) is float # keep result = list(df_only_numerical_cols.iterfeatures(na='keep'))[0] assert type(result['properties']['Shape_Leng']) is float def test_geodataframe_geojson_no_bbox(self): geo = self.df._to_geo(na="null", show_bbox=False) assert 'bbox' not in geo.keys() for feature in geo['features']: assert 'bbox' not in feature.keys() def test_geodataframe_geojson_bbox(self): geo = self.df._to_geo(na="null", show_bbox=True) assert 'bbox' in geo.keys() assert len(geo['bbox']) == 4 assert isinstance(geo['bbox'], tuple) for feature in geo['features']: assert 'bbox' in feature.keys() def test_pickle(self): filename = os.path.join(self.tempdir, 'df.pkl') self.df.to_pickle(filename) unpickled = pd.read_pickle(filename) assert_frame_equal(self.df, unpickled) assert self.df.crs == unpickled.crs
class Exposures(): """geopandas GeoDataFrame with metada and columns (pd.Series) defined in Attributes. Attributes: tag (Tag): metada - information about the source data ref_year (int): metada - reference year value_unit (str): metada - unit of the exposures values latitude (pd.Series): latitude longitude (pd.Series): longitude crs (dict or crs): CRS information inherent to GeoDataFrame. value (pd.Series): a value for each exposure if_ (pd.Series, optional): e.g. if_TC. impact functions id for hazard TC. There might be different hazards defined: if_TC, if_FL, ... If not provided, set to default 'if_' with ids 1 in check(). geometry (pd.Series, optional): geometry of type Point of each instance. Computed in method set_geometry_points(). meta (dict): dictionary containing corresponding raster properties (if any): width, height, crs and transform must be present at least (transform needs to contain upper left corner!). Exposures might not contain all the points of the corresponding raster. Not used in internal computations. deductible (pd.Series, optional): deductible value for each exposure cover (pd.Series, optional): cover value for each exposure category_id (pd.Series, optional): category id for each exposure region_id (pd.Series, optional): region id for each exposure centr_ (pd.Series, optional): e.g. centr_TC. centroids index for hazard TC. There might be different hazards defined: centr_TC, centr_FL, ... Computed in method assign_centroids(). """ _metadata = ['tag', 'ref_year', 'value_unit', 'meta'] vars_oblig = ['value', 'latitude', 'longitude'] """Name of the variables needed to compute the impact.""" vars_def = [INDICATOR_IF] """Name of variables that can be computed.""" vars_opt = [ INDICATOR_CENTR, 'deductible', 'cover', 'category_id', 'region_id', 'geometry' ] """Name of the variables that aren't need to compute the impact.""" @property def crs(self): """Coordinate Reference System, refers to the crs attribute of the inherent GeoDataFrame""" try: return self.gdf.crs except AttributeError: return self.meta.get('crs') def __init__(self, *args, **kwargs): """Creates an Exposures object from a GeoDataFrame Parameters ---------- *args : Arguments of the GeoDataFrame constructor **kwargs : Named arguments of the GeoDataFrame constructor, additionally tag : climada.entity.exposures.tag.Tag Exopusres tag ref_year : int Reference Year value_unit : str Unit of the exposed value meta : dict Metadata dictionary """ # meta data try: self.meta = kwargs.pop('meta') if self.meta is None: self.meta = {} if not isinstance(self.meta, dict): raise ValueError("meta must be a dictionary") except KeyError: self.meta = {} LOGGER.info('meta set to default value %s', self.meta) # tag try: self.tag = kwargs.pop('tag') except KeyError: self.tag = self.meta.get('tag', Tag()) if 'tag' not in self.meta: LOGGER.info('tag set to default value %s', self.tag) # reference year try: self.ref_year = kwargs.pop('ref_year') except KeyError: self.ref_year = self.meta.get('ref_year', DEF_REF_YEAR) if 'ref_year' not in self.meta: LOGGER.info('ref_year set to default value %s', self.ref_year) # value unit try: self.value_unit = kwargs.pop('value_unit') except KeyError: self.value_unit = self.meta.get('ref_year', DEF_VALUE_UNIT) if 'value_unit' not in self.meta: LOGGER.info('value_unit set to default value %s', self.value_unit) # remaining generic attributes for mda in type(self)._metadata: if mda not in Exposures._metadata: if mda in kwargs: setattr(self, mda, kwargs.pop(mda)) elif mda in self.meta: setattr(self, mda, self.meta[mda]) else: setattr(self, mda, None) # make the data frame self.gdf = GeoDataFrame(*args, **kwargs) # align crs from gdf and meta data if self.gdf.crs: crs = self.gdf.crs # With geopandas 3.1, the crs attribute is not conserved by the constructor # without a geometry column. Therefore the conservation is done 'manually': elif len(args) > 0: try: crs = args[0].crs except AttributeError: crs = None elif 'data' in kwargs: try: crs = kwargs['data'].crs except AttributeError: crs = None else: crs = None # store the crs in the meta dictionary if crs: if self.meta.get('crs') and not u_coord.equal_crs( self.meta.get('crs'), crs): LOGGER.info( 'crs from `meta` argument ignored and overwritten by GeoDataFrame' ' crs: %s', self.gdf.crs) self.meta['crs'] = crs if not self.gdf.crs: self.gdf.crs = crs else: if 'crs' not in self.meta: LOGGER.info('crs set to default value: %s', DEF_CRS) self.meta['crs'] = DEF_CRS self.gdf.crs = self.meta['crs'] def __str__(self): return '\n'.join( [f"{md}: {self.__dict__[md]}" for md in type(self)._metadata] + [f"crs: {self.crs}", "data:", str(self.gdf)]) def check(self): """Check Exposures consistency. Reports missing columns in log messages. If no if_* column is present in the dataframe, a default column 'if_' is added with default impact function id 1. """ # mandatory columns for var in self.vars_oblig: if var not in self.gdf.columns: LOGGER.error("%s missing.", var) raise ValueError(f"{var} missing in gdf") # computable columns except if_* for var in sorted(set(self.vars_def).difference([INDICATOR_IF])): if not var in self.gdf.columns: LOGGER.info("%s not set.", var) # special treatment for if_* if INDICATOR_IF in self.gdf.columns: LOGGER.info("Hazard type not set in %s", INDICATOR_IF) elif not any( [col.startswith(INDICATOR_IF) for col in self.gdf.columns]): LOGGER.info("Setting %s to default impact functions ids 1.", INDICATOR_IF) self.gdf[INDICATOR_IF] = 1 # optional columns except centr_* for var in sorted(set(self.vars_opt).difference([INDICATOR_CENTR])): if not var in self.gdf.columns: LOGGER.info("%s not set.", var) # special treatment for centr_* if INDICATOR_CENTR in self.gdf.columns: LOGGER.info("Hazard type not set in %s", INDICATOR_CENTR) elif not any( [col.startswith(INDICATOR_CENTR) for col in self.gdf.columns]): LOGGER.info("%s not set.", INDICATOR_CENTR) # check whether geometry corresponds to lat/lon try: if (self.gdf.geometry.values[0].x != self.gdf.longitude.values[0] or self.gdf.geometry.values[0].y != self.gdf.latitude.values[0]): raise ValueError( "Geometry values do not correspond to latitude and" + " longitude. Use set_geometry_points() or set_lat_lon().") except AttributeError: # no geometry column pass def assign_centroids(self, hazard, method='NN', distance='haversine', threshold=100): """Assign for each exposure coordinate closest hazard coordinate. -1 used for disatances > threshold in point distances. If raster hazard, -1 used for centroids outside raster. Parameters: hazard (Hazard): hazard to match (with raster or vector centroids) method (str, optional): interpolation method to use in vector hazard. Nearest neighbor (NN) default distance (str, optional): distance to use in vector hazard. Haversine default threshold (float): distance threshold in km over which no neighbor will be found in vector hazard. Those are assigned with a -1. Default 100 km. """ LOGGER.info('Matching %s exposures with %s centroids.', str(self.gdf.shape[0]), str(hazard.centroids.size)) if not u_coord.equal_crs(self.crs, hazard.centroids.crs): LOGGER.error('Set hazard and exposure to same CRS first!') raise ValueError if hazard.centroids.meta: xres, _, xmin, _, yres, ymin = hazard.centroids.meta[ 'transform'][:6] xmin, ymin = xmin + 0.5 * xres, ymin + 0.5 * yres x_i = np.round( (self.gdf.longitude.values - xmin) / xres).astype(int) y_i = np.round( (self.gdf.latitude.values - ymin) / yres).astype(int) assigned = y_i * hazard.centroids.meta['width'] + x_i assigned[(x_i < 0) | (x_i >= hazard.centroids.meta['width'])] = -1 assigned[(y_i < 0) | (y_i >= hazard.centroids.meta['height'])] = -1 else: coord = np.stack( [self.gdf.latitude.values, self.gdf.longitude.values], axis=1) haz_coord = hazard.centroids.coord if np.array_equal(coord, haz_coord): assigned = np.arange(self.gdf.shape[0]) else: # pairs of floats can be sorted (lexicographically) in NumPy coord_view = coord.view(dtype='float64,float64').reshape(-1) haz_coord_view = haz_coord.view( dtype='float64,float64').reshape(-1) # assign each hazard coordinate to an element in coord using searchsorted coord_sorter = np.argsort(coord_view) haz_assign_idx = np.fmin( coord_sorter.size - 1, np.searchsorted(coord_view, haz_coord_view, side="left", sorter=coord_sorter)) haz_assign_idx = coord_sorter[haz_assign_idx] # determine which of the assignements match exactly haz_match_idx = ( coord_view[haz_assign_idx] == haz_coord_view).nonzero()[0] assigned = np.full_like(coord_sorter, -1) assigned[haz_assign_idx[haz_match_idx]] = haz_match_idx # assign remaining coordinates to their geographically nearest neighbor if haz_match_idx.size != coord_view.size: not_assigned_mask = (assigned == -1) assigned[not_assigned_mask] = interpol_index( haz_coord, coord[not_assigned_mask], method=method, distance=distance, threshold=threshold) self.gdf[INDICATOR_CENTR + hazard.tag.haz_type] = assigned def set_geometry_points(self, scheduler=None): """Set geometry attribute of GeoDataFrame with Points from latitude and longitude attributes. Parameters: scheduler (str): used for dask map_partitions. “threads”, “synchronous” or “processes” """ u_coord.set_df_geometry_points(self.gdf, scheduler) def set_lat_lon(self): """Set latitude and longitude attributes from geometry attribute.""" LOGGER.info('Setting latitude and longitude attributes.') self.gdf['latitude'] = self.gdf.geometry[:].y self.gdf['longitude'] = self.gdf.geometry[:].x def set_from_raster(self, file_name, band=1, src_crs=None, window=False, geometry=False, dst_crs=False, transform=None, width=None, height=None, resampling=Resampling.nearest): """Read raster data and set latitude, longitude, value and meta Parameters: file_name (str): file name containing values band (int, optional): bands to read (starting at 1) src_crs (crs, optional): source CRS. Provide it if error without it. window (rasterio.windows.Windows, optional): window where data is extracted geometry (shapely.geometry, optional): consider pixels only in shape dst_crs (crs, optional): reproject to given crs transform (rasterio.Affine): affine transformation to apply wdith (float): number of lons for transform height (float): number of lats for transform resampling (rasterio.warp,.Resampling optional): resampling function used for reprojection to dst_crs """ self.tag = Tag() self.tag.file_name = str(file_name) meta, value = u_coord.read_raster(file_name, [band], src_crs, window, geometry, dst_crs, transform, width, height, resampling) ulx, xres, _, uly, _, yres = meta['transform'].to_gdal() lrx = ulx + meta['width'] * xres lry = uly + meta['height'] * yres x_grid, y_grid = np.meshgrid(np.arange(ulx + xres / 2, lrx, xres), np.arange(uly + yres / 2, lry, yres)) try: self.gdf.crs = meta['crs'].to_dict() except AttributeError: self.gdf.crs = meta['crs'] self.gdf['longitude'] = x_grid.flatten() self.gdf['latitude'] = y_grid.flatten() self.gdf['value'] = value.reshape(-1) self.meta = meta def plot_scatter(self, mask=None, ignore_zero=False, pop_name=True, buffer=0.0, extend='neither', axis=None, **kwargs): """Plot exposures geometry's value sum scattered over Earth's map. The plot will we projected according to the current crs. Parameters: mask (np.array, optional): mask to apply to eai_exp plotted. ignore_zero (bool, optional): flag to indicate if zero and negative values are ignored in plot. Default: False pop_name (bool, optional): add names of the populated places buffer (float, optional): border to add to coordinates. Default: 0.0. extend (str, optional): extend border colorbar with arrows. [ 'neither' | 'both' | 'min' | 'max' ] axis (matplotlib.axes._subplots.AxesSubplot, optional): axis to use kwargs (optional): arguments for scatter matplotlib function, e.g. cmap='Greys'. Default: 'Wistia' Returns: cartopy.mpl.geoaxes.GeoAxesSubplot """ crs_epsg, _ = u_plot.get_transformation(self.crs) title = self.tag.description cbar_label = 'Value (%s)' % self.value_unit if mask is None: mask = np.ones((self.gdf.shape[0], ), dtype=bool) if ignore_zero: pos_vals = self.gdf.value[mask].values > 0 else: pos_vals = np.ones((self.gdf.value[mask].values.size, ), dtype=bool) value = self.gdf.value[mask][pos_vals].values coord = np.stack([ self.gdf.latitude[mask][pos_vals].values, self.gdf.longitude[mask][pos_vals].values ], axis=1) return u_plot.geo_scatter_from_array(value, coord, cbar_label, title, pop_name, buffer, extend, proj=crs_epsg, axes=axis, **kwargs) def plot_hexbin(self, mask=None, ignore_zero=False, pop_name=True, buffer=0.0, extend='neither', axis=None, **kwargs): """Plot exposures geometry's value sum binned over Earth's map. An other function for the bins can be set through the key reduce_C_function. The plot will we projected according to the current crs. Parameters: mask (np.array, optional): mask to apply to eai_exp plotted. ignore_zero (bool, optional): flag to indicate if zero and negative values are ignored in plot. Default: False pop_name (bool, optional): add names of the populated places buffer (float, optional): border to add to coordinates. Default: 0.0. extend (str, optional): extend border colorbar with arrows. [ 'neither' | 'both' | 'min' | 'max' ] axis (matplotlib.axes._subplots.AxesSubplot, optional): axis to use kwargs (optional): arguments for hexbin matplotlib function, e.g. reduce_C_function=np.average. Default: reduce_C_function=np.sum Returns: cartopy.mpl.geoaxes.GeoAxesSubplot """ crs_epsg, _ = u_plot.get_transformation(self.crs) title = self.tag.description cbar_label = 'Value (%s)' % self.value_unit if 'reduce_C_function' not in kwargs: kwargs['reduce_C_function'] = np.sum if mask is None: mask = np.ones((self.gdf.shape[0], ), dtype=bool) if ignore_zero: pos_vals = self.gdf.value[mask].values > 0 else: pos_vals = np.ones((self.gdf.value[mask].values.size, ), dtype=bool) value = self.gdf.value[mask][pos_vals].values coord = np.stack([ self.gdf.latitude[mask][pos_vals].values, self.gdf.longitude[mask][pos_vals].values ], axis=1) return u_plot.geo_bin_from_array(value, coord, cbar_label, title, pop_name, buffer, extend, proj=crs_epsg, axes=axis, **kwargs) def plot_raster(self, res=None, raster_res=None, save_tiff=None, raster_f=lambda x: np.log10((np.fmax(x + 1, 1))), label='value (log10)', scheduler=None, axis=None, **kwargs): """Generate raster from points geometry and plot it using log10 scale: np.log10((np.fmax(raster+1, 1))). Parameters: res (float, optional): resolution of current data in units of latitude and longitude, approximated if not provided. raster_res (float, optional): desired resolution of the raster save_tiff (str, optional): file name to save the raster in tiff format, if provided raster_f (lambda function): transformation to use to data. Default: log10 adding 1. label (str): colorbar label scheduler (str): used for dask map_partitions. “threads”, “synchronous” or “processes” axis (matplotlib.axes._subplots.AxesSubplot, optional): axis to use kwargs (optional): arguments for imshow matplotlib function Returns: matplotlib.figure.Figure, cartopy.mpl.geoaxes.GeoAxesSubplot """ if self.meta and self.meta.get('height', 0) * self.meta.get( 'height', 0) == len(self.gdf): raster = self.gdf.value.values.reshape( (self.meta['height'], self.meta['width'])) # check raster starts by upper left corner if self.gdf.latitude.values[0] < self.gdf.latitude.values[-1]: raster = np.flip(raster, axis=0) if self.gdf.longitude.values[0] > self.gdf.longitude.values[-1]: LOGGER.error( 'Points are not ordered according to meta raster.') raise ValueError else: raster, meta = u_coord.points_to_raster(self.gdf, ['value'], res, raster_res, scheduler) raster = raster.reshape((meta['height'], meta['width'])) # save tiff if save_tiff is not None: with rasterio.open(save_tiff, 'w', driver='GTiff', height=meta['height'], width=meta['width'], count=1, dtype=np.float32, crs=self.crs, transform=meta['transform']) as ras_tiff: ras_tiff.write(raster.astype(np.float32), 1) # make plot proj_data, _ = u_plot.get_transformation(self.crs) proj_plot = proj_data if isinstance(proj_data, ccrs.PlateCarree): # use different projections for plot and data to shift the central lon in the plot xmin, ymin, xmax, ymax = u_coord.latlon_bounds( self.gdf.latitude.values, self.gdf.longitude.values) proj_plot = ccrs.PlateCarree(central_longitude=0.5 * (xmin + xmax)) else: xmin, ymin, xmax, ymax = (self.gdf.longitude.min(), self.gdf.latitude.min(), self.gdf.longitude.max(), self.gdf.latitude.max()) if not axis: _, axis = u_plot.make_map(proj=proj_plot) cbar_ax = make_axes_locatable(axis).append_axes('right', size="6.5%", pad=0.1, axes_class=plt.Axes) axis.set_extent((xmin, xmax, ymin, ymax), crs=proj_data) u_plot.add_shapes(axis) imag = axis.imshow(raster_f(raster), **kwargs, origin='upper', extent=(xmin, xmax, ymin, ymax), transform=proj_data) plt.colorbar(imag, cax=cbar_ax, label=label) plt.draw() return axis def plot_basemap( self, mask=None, ignore_zero=False, pop_name=True, buffer=0.0, extend='neither', zoom=10, url='http://tile.stamen.com/terrain/tileZ/tileX/tileY.png', axis=None, **kwargs): """Scatter points over satellite image using contextily Parameters: mask (np.array, optional): mask to apply to eai_exp plotted. Same size of the exposures, only the selected indexes will be plot. ignore_zero (bool, optional): flag to indicate if zero and negative values are ignored in plot. Default: False pop_name (bool, optional): add names of the populated places buffer (float, optional): border to add to coordinates. Default: 0.0. extend (str, optional): extend border colorbar with arrows. [ 'neither' | 'both' | 'min' | 'max' ] zoom (int, optional): zoom coefficient used in the satellite image url (str, optional): image source, e.g. ctx.sources.OSM_C axis (matplotlib.axes._subplots.AxesSubplot, optional): axis to use kwargs (optional): arguments for scatter matplotlib function, e.g. cmap='Greys'. Default: 'Wistia' Returns: matplotlib.figure.Figure, cartopy.mpl.geoaxes.GeoAxesSubplot """ if 'geometry' not in self.gdf.columns: self.set_geometry_points() crs_ori = self.crs self.to_crs(epsg=3857, inplace=True) axis = self.plot_scatter(mask, ignore_zero, pop_name, buffer, extend, shapes=False, axis=axis, **kwargs) ctx.add_basemap(axis, zoom, url, origin='upper') axis.set_axis_off() self.to_crs(crs_ori, inplace=True) return axis def write_hdf5(self, file_name): """Write data frame and metadata in hdf5 format Parameters: file_name (str): (path and) file name to write to. """ LOGGER.info('Writting %s', file_name) store = pd.HDFStore(file_name) pandas_df = pd.DataFrame(self.gdf) for col in pandas_df.columns: if str(pandas_df[col].dtype) == "geometry": pandas_df[col] = np.asarray(self.gdf[col]) store.put('exposures', pandas_df) var_meta = {} for var in type(self)._metadata: var_meta[var] = getattr(self, var) store.get_storer('exposures').attrs.metadata = var_meta store.close() def read_hdf5(self, file_name): """Read data frame and metadata in hdf5 format Parameters: file_name (str): (path and) file name to read from. Optional Parameters: additional_vars (list): list of additional variable names to read that are not in exposures.base._metadata """ LOGGER.info('Reading %s', file_name) with pd.HDFStore(file_name) as store: self.__init__(store['exposures']) metadata = store.get_storer('exposures').attrs.metadata for key, val in metadata.items(): if key in type(self)._metadata: setattr(self, key, val) if key == 'crs': self.gdf.crs = val def read_mat(self, file_name, var_names=None): """Read MATLAB file and store variables in exposures. Parameters: file_name (str): absolute path file var_names (dict, optional): dictionary containing the name of the MATLAB variables. Default: DEF_VAR_MAT. """ LOGGER.info('Reading %s', file_name) if not var_names: var_names = DEF_VAR_MAT data = u_hdf5.read(file_name) try: data = data[var_names['sup_field_name']] except KeyError: pass try: data = data[var_names['field_name']] exposures = dict() _read_mat_obligatory(exposures, data, var_names) _read_mat_optional(exposures, data, var_names) except KeyError as var_err: LOGGER.error("Not existing variable: %s", str(var_err)) raise var_err self.gdf = GeoDataFrame(data=exposures, crs=self.crs) _read_mat_metadata(self, data, file_name, var_names) # # Extends the according geopandas method # def to_crs(self, crs=None, epsg=None, inplace=False): """Wrapper of the GeoDataFrame.to_crs method. Transform geometries to a new coordinate reference system. Transform all geometries in a GeoSeries to a different coordinate reference system. The crs attribute on the current GeoSeries must be set. Either crs in string or dictionary form or an EPSG code may be specified for output. This method will transform all points in all objects. It has no notion or projecting entire geometries. All segments joining points are assumed to be lines in the current projection, not geodesics. Objects crossing the dateline (or other projection boundary) will have undesirable behavior. Parameters: crs : dict or str Output projection parameters as string or in dictionary form. epsg : int EPSG code specifying output projection. inplace : bool, optional, default: False Whether to return a new GeoDataFrame or do the transformation in place. Returns: None if inplace is True else a transformed copy of the exposures object """ if inplace: self.gdf.to_crs(crs, epsg, True) self.meta['crs'] = crs self.set_lat_lon() return None exp = self.copy() exp.to_crs(crs, epsg, True) return exp def plot(self, *args, **kwargs): """Wrapper of the GeoDataFram.plot method""" self.gdf.plot(*args, **kwargs) plot.__doc__ = GeoDataFrame.plot.__doc__ def copy(self, deep=True): """Make a copy of this Exposures object. Parameters ---------- deep (bool): Make a deep copy, i.e. also copy data. Default True. Returns ------- Exposures """ gdf = self.gdf.copy(deep=deep) metadata = dict([(md, copy.deepcopy(self.__dict__[md])) for md in type(self)._metadata]) metadata['crs'] = self.crs return type(self)(gdf, **metadata) def write_raster(self, file_name, value_name='value', scheduler=None): """Write value data into raster file with GeoTiff format Parameters: file_name (str): name output file in tif format """ if self.meta and self.meta['height'] * self.meta['width'] == len( self.gdf): raster = self.gdf[value_name].values.reshape( (self.meta['height'], self.meta['width'])) # check raster starts by upper left corner if self.gdf.latitude.values[0] < self.gdf.latitude.values[-1]: raster = np.flip(raster, axis=0) if self.gdf.longitude.values[0] > self.gdf.longitude.values[-1]: LOGGER.error( 'Points are not ordered according to meta raster.') raise ValueError u_coord.write_raster(file_name, raster, self.meta) else: raster, meta = u_coord.points_to_raster(self, [value_name], scheduler=scheduler) u_coord.write_raster(file_name, raster, meta) @staticmethod def concat(exposures_list): """Concatenates Exposures or DataFrame objectss to one Exposures object. Parameters ---------- exposures_list : list of Exposures or DataFrames The list must not be empty with the first item supposed to be an Exposures object. Returns ------- Exposures with the metadata of the first item in the list and the dataframes concatenated. """ exp = exposures_list[0].copy(deep=False) df_list = [ ex.gdf if isinstance(ex, Exposures) else ex for ex in exposures_list ] exp.gdf = GeoDataFrame(pd.concat(df_list, ignore_index=True, sort=False), crs=exp.crs) return exp
class TestSpatialJoinNYBB: def setup_method(self): nybb_filename = geopandas.datasets.get_path('nybb') self.polydf = read_file(nybb_filename) self.crs = self.polydf.crs N = 20 b = [int(x) for x in self.polydf.total_bounds] self.pointdf = GeoDataFrame( [{'geometry': Point(x, y), 'pointattr1': x + y, 'pointattr2': x - y} for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)), range(b[1], b[3], int((b[3]-b[1])/N)))], crs=self.crs) def test_geometry_name(self): # test sjoin is working with other geometry name polydf_original_geom_name = self.polydf.geometry.name self.polydf = (self.polydf.rename(columns={'geometry': 'new_geom'}) .set_geometry('new_geom')) assert polydf_original_geom_name != self.polydf.geometry.name res = sjoin(self.polydf, self.pointdf, how="left") assert self.polydf.geometry.name == res.geometry.name def test_sjoin_left(self): df = sjoin(self.pointdf, self.polydf, how='left') assert df.shape == (21, 8) for i, row in df.iterrows(): assert row.geometry.type == 'Point' assert 'pointattr1' in df.columns assert 'BoroCode' in df.columns def test_sjoin_right(self): # the inverse of left df = sjoin(self.pointdf, self.polydf, how="right") df2 = sjoin(self.polydf, self.pointdf, how="left") assert df.shape == (12, 8) assert df.shape == df2.shape for i, row in df.iterrows(): assert row.geometry.type == 'MultiPolygon' for i, row in df2.iterrows(): assert row.geometry.type == 'MultiPolygon' def test_sjoin_inner(self): df = sjoin(self.pointdf, self.polydf, how="inner") assert df.shape == (11, 8) def test_sjoin_op(self): # points within polygons df = sjoin(self.pointdf, self.polydf, how="left", op="within") assert df.shape == (21, 8) assert df.loc[1]['BoroName'] == 'Staten Island' # points contain polygons? never happens so we should have nulls df = sjoin(self.pointdf, self.polydf, how="left", op="contains") assert df.shape == (21, 8) assert np.isnan(df.loc[1]['Shape_Area']) def test_sjoin_bad_op(self): # AttributeError: 'Point' object has no attribute 'spandex' with pytest.raises(ValueError): sjoin(self.pointdf, self.polydf, how="left", op="spandex") def test_sjoin_duplicate_column_name(self): pointdf2 = self.pointdf.rename(columns={'pointattr1': 'Shape_Area'}) df = sjoin(pointdf2, self.polydf, how="left") assert 'Shape_Area_left' in df.columns assert 'Shape_Area_right' in df.columns @pytest.mark.parametrize('how', ['left', 'right', 'inner']) def test_sjoin_named_index(self, how): #original index names should be unchanged pointdf2 = self.pointdf.copy() pointdf2.index.name = 'pointid' df = sjoin(pointdf2, self.polydf, how=how) assert pointdf2.index.name == 'pointid' assert self.polydf.index.name == None def test_sjoin_values(self): # GH190 self.polydf.index = [1, 3, 4, 5, 6] df = sjoin(self.pointdf, self.polydf, how='left') assert df.shape == (21, 8) df = sjoin(self.polydf, self.pointdf, how='left') assert df.shape == (12, 8) @pytest.mark.skipif(str(pd.__version__) < LooseVersion('0.19'), reason=pandas_0_18_problem) @pytest.mark.xfail def test_no_overlapping_geometry(self): # Note: these tests are for correctly returning GeoDataFrame # when result of the join is empty df_inner = sjoin(self.pointdf.iloc[17:], self.polydf, how='inner') df_left = sjoin(self.pointdf.iloc[17:], self.polydf, how='left') df_right = sjoin(self.pointdf.iloc[17:], self.polydf, how='right') # Recent Pandas development has introduced a new way of handling merges # this change has altered the output when no overlapping geometries if str(pd.__version__) > LooseVersion('0.18.1'): right_idxs = pd.Series(range(0, 5), name='index_right', dtype='int64') else: right_idxs = pd.Series(name='index_right', dtype='int64') expected_inner_df = pd.concat( [self.pointdf.iloc[:0], pd.Series(name='index_right', dtype='int64'), self.polydf.drop('geometry', axis=1).iloc[:0]], axis=1) expected_inner = GeoDataFrame( expected_inner_df, crs={'init': 'epsg:4326', 'no_defs': True}) expected_right_df = pd.concat( [self.pointdf.drop('geometry', axis=1).iloc[:0], pd.concat([pd.Series(name='index_left', dtype='int64'), right_idxs], axis=1), self.polydf], axis=1) expected_right = GeoDataFrame( expected_right_df, crs={'init': 'epsg:4326', 'no_defs': True})\ .set_index('index_right') expected_left_df = pd.concat( [self.pointdf.iloc[17:], pd.Series(name='index_right', dtype='int64'), self.polydf.iloc[:0].drop('geometry', axis=1)], axis=1) expected_left = GeoDataFrame( expected_left_df, crs={'init': 'epsg:4326', 'no_defs': True}) assert expected_inner.equals(df_inner) assert expected_right.equals(df_right) assert expected_left.equals(df_left) @pytest.mark.skip("Not implemented") def test_sjoin_outer(self): df = sjoin(self.pointdf, self.polydf, how="outer") assert df.shape == (21, 8)
class TestDataFrame(unittest.TestCase): def setUp(self): N = 10 nybb_filename = download_nybb() self.df = read_file('/nybb_13a/nybb.shp', vfs='zip://' + nybb_filename) self.tempdir = tempfile.mkdtemp() self.boros = np.array( ['Staten Island', 'Queens', 'Brooklyn', 'Manhattan', 'Bronx']) self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([{ 'geometry': Point(x, y), 'value1': x + y, 'value2': x * y } for x, y in zip(range(N), range(N))], crs=self.crs) def tearDown(self): shutil.rmtree(self.tempdir) def test_df_init(self): self.assertTrue(type(self.df2) is GeoDataFrame) self.assertTrue(self.df2.crs == self.crs) def test_different_geo_colname(self): data = { "A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))] } df = GeoDataFrame(data, crs=self.crs, geometry='location') locs = GeoSeries(data['location'], crs=self.crs) assert_geoseries_equal(df.geometry, locs) self.assert_('geometry' not in df) self.assertEqual(df.geometry.name, 'location') # internal implementation detail self.assertEqual(df._geometry_column_name, 'location') geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))] df2 = df.set_geometry(geom2, crs='dummy_crs') self.assert_('geometry' in df2) self.assert_('location' in df2) self.assertEqual(df2.crs, 'dummy_crs') self.assertEqual(df2.geometry.crs, 'dummy_crs') # reset so it outputs okay df2.crs = df.crs assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs)) # for right now, non-geometry comes back as series assert_geoseries_equal(df2['location'], df['location'], check_series_type=False, check_dtype=False) def test_geo_getitem(self): data = { "A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))] } df = GeoDataFrame(data, crs=self.crs, geometry='location') self.assert_(isinstance(df.geometry, GeoSeries)) df['geometry'] = df["A"] self.assert_(isinstance(df.geometry, GeoSeries)) self.assertEqual(df.geometry[0], data['location'][0]) # good if this changed in the future self.assert_(not isinstance(df['geometry'], GeoSeries)) self.assert_(isinstance(df['location'], GeoSeries)) data["geometry"] = [ Point(x + 1, y - 1) for x, y in zip(range(5), range(5)) ] df = GeoDataFrame(data, crs=self.crs) self.assert_(isinstance(df.geometry, GeoSeries)) self.assert_(isinstance(df['geometry'], GeoSeries)) # good if this changed in the future self.assert_(not isinstance(df['location'], GeoSeries)) def test_geometry_property(self): assert_geoseries_equal(self.df.geometry, self.df['geometry'], check_dtype=True, check_index_type=True) df = self.df.copy() new_geom = [ Point(x, y) for x, y in zip(range(len(self.df)), range(len(self.df))) ] df.geometry = new_geom new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs) assert_geoseries_equal(df.geometry, new_geom) assert_geoseries_equal(df['geometry'], new_geom) # new crs gs = GeoSeries(new_geom, crs="epsg:26018") df.geometry = gs self.assertEqual(df.crs, "epsg:26018") def test_geometry_property_errors(self): with self.assertRaises(AttributeError): df = self.df.copy() del df['geometry'] df.geometry # list-like error with self.assertRaises(ValueError): df = self.df2.copy() df.geometry = 'value1' # list-like error with self.assertRaises(ValueError): df = self.df.copy() df.geometry = 'apple' # non-geometry error with self.assertRaises(TypeError): df = self.df.copy() df.geometry = range(df.shape[0]) with self.assertRaises(KeyError): df = self.df.copy() del df['geometry'] df['geometry'] # ndim error with self.assertRaises(ValueError): df = self.df.copy() df.geometry = df def test_set_geometry(self): geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))]) original_geom = self.df.geometry df2 = self.df.set_geometry(geom) self.assert_(self.df is not df2) assert_geoseries_equal(df2.geometry, geom) assert_geoseries_equal(self.df.geometry, original_geom) assert_geoseries_equal(self.df['geometry'], self.df.geometry) # unknown column with self.assertRaises(ValueError): self.df.set_geometry('nonexistent-column') # ndim error with self.assertRaises(ValueError): self.df.set_geometry(self.df) # new crs - setting should default to GeoSeries' crs gs = GeoSeries(geom, crs="epsg:26018") new_df = self.df.set_geometry(gs) self.assertEqual(new_df.crs, "epsg:26018") # explicit crs overrides self and dataframe new_df = self.df.set_geometry(gs, crs="epsg:27159") self.assertEqual(new_df.crs, "epsg:27159") self.assertEqual(new_df.geometry.crs, "epsg:27159") # Series should use dataframe's new_df = self.df.set_geometry(geom.values) self.assertEqual(new_df.crs, self.df.crs) self.assertEqual(new_df.geometry.crs, self.df.crs) def test_set_geometry_col(self): g = self.df.geometry g_simplified = g.simplify(100) self.df['simplified_geometry'] = g_simplified df2 = self.df.set_geometry('simplified_geometry') # Drop is false by default self.assert_('simplified_geometry' in df2) assert_geoseries_equal(df2.geometry, g_simplified) # If True, drops column and renames to geometry df3 = self.df.set_geometry('simplified_geometry', drop=True) self.assert_('simplified_geometry' not in df3) assert_geoseries_equal(df3.geometry, g_simplified) def test_set_geometry_inplace(self): geom = [Point(x, y) for x, y in zip(range(5), range(5))] ret = self.df.set_geometry(geom, inplace=True) self.assert_(ret is None) geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs) assert_geoseries_equal(self.df.geometry, geom) def test_to_json(self): text = self.df.to_json() data = json.loads(text) self.assertTrue(data['type'] == 'FeatureCollection') self.assertTrue(len(data['features']) == 5) def test_to_json_na(self): # Set a value as nan and make sure it's written self.df['Shape_Area'][self.df['BoroName'] == 'Queens'] = np.nan text = self.df.to_json() data = json.loads(text) self.assertTrue(len(data['features']) == 5) for f in data['features']: props = f['properties'] self.assertEqual(len(props), 4) if props['BoroName'] == 'Queens': self.assertTrue(props['Shape_Area'] is None) def test_to_json_dropna(self): self.df['Shape_Area'][self.df['BoroName'] == 'Queens'] = np.nan self.df['Shape_Leng'][self.df['BoroName'] == 'Bronx'] = np.nan text = self.df.to_json(na='drop') data = json.loads(text) self.assertEqual(len(data['features']), 5) for f in data['features']: props = f['properties'] if props['BoroName'] == 'Queens': self.assertEqual(len(props), 3) self.assertTrue('Shape_Area' not in props) # Just make sure setting it to nan in a different row # doesn't affect this one self.assertTrue('Shape_Leng' in props) elif props['BoroName'] == 'Bronx': self.assertEqual(len(props), 3) self.assertTrue('Shape_Leng' not in props) self.assertTrue('Shape_Area' in props) else: self.assertEqual(len(props), 4) def test_to_json_keepna(self): self.df['Shape_Area'][self.df['BoroName'] == 'Queens'] = np.nan self.df['Shape_Leng'][self.df['BoroName'] == 'Bronx'] = np.nan text = self.df.to_json(na='keep') data = json.loads(text) self.assertEqual(len(data['features']), 5) for f in data['features']: props = f['properties'] self.assertEqual(len(props), 4) if props['BoroName'] == 'Queens': self.assertTrue(np.isnan(props['Shape_Area'])) # Just make sure setting it to nan in a different row # doesn't affect this one self.assertTrue('Shape_Leng' in props) elif props['BoroName'] == 'Bronx': self.assertTrue(np.isnan(props['Shape_Leng'])) self.assertTrue('Shape_Area' in props) def test_copy(self): df2 = self.df.copy() self.assertTrue(type(df2) is GeoDataFrame) self.assertEqual(self.df.crs, df2.crs) def test_to_file(self): """ Test to_file and from_file """ tempfilename = os.path.join(self.tempdir, 'boros.shp') self.df.to_file(tempfilename) # Read layer back in? df = GeoDataFrame.from_file(tempfilename) self.assertTrue('geometry' in df) self.assertTrue(len(df) == 5) self.assertTrue(np.alltrue(df['BoroName'].values == self.boros)) def test_mixed_types_to_file(self): """ Test that mixed geometry types raise error when writing to file """ tempfilename = os.path.join(self.tempdir, 'test.shp') s = GeoDataFrame( {'geometry': [Point(0, 0), Polygon([(0, 0), (1, 0), (1, 1)])]}) with self.assertRaises(ValueError): s.to_file(tempfilename) def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df['BoroName'].str.contains('B')] self.assertTrue(len(df) == 2) boros = df['BoroName'].values self.assertTrue('Brooklyn' in boros) self.assertTrue('Bronx' in boros) self.assertTrue(type(df) is GeoDataFrame) def test_transform(self): df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) self.assertTrue( all(df2['geometry'].geom_almost_equals(utm['geometry'], decimal=2))) def test_from_features(self): nybb_filename = download_nybb() with fiona.open('/nybb_13a/nybb.shp', vfs='zip://' + nybb_filename) as f: features = list(f) crs = f.crs df = GeoDataFrame.from_features(features, crs=crs) df.rename(columns=lambda x: x.lower(), inplace=True) validate_boro_df(self, df) self.assert_(df.crs == crs) def test_from_postgis_default(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise unittest.case.SkipTest() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con) finally: con.close() validate_boro_df(self, df) def test_from_postgis_custom_geom_col(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise unittest.case.SkipTest() try: sql = """SELECT borocode, boroname, shape_leng, shape_area, geom AS __geometry__ FROM nybb;""" df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__') finally: con.close() validate_boro_df(self, df) def test_dataframe_to_geodataframe(self): df = pd.DataFrame( { "A": range(len(self.df)), "location": list(self.df.geometry) }, index=self.df.index) gf = df.set_geometry('location', crs=self.df.crs) self.assertIsInstance(df, pd.DataFrame) self.assertIsInstance(gf, GeoDataFrame) assert_geoseries_equal(gf.geometry, self.df.geometry) self.assertEqual(gf.geometry.name, 'location') self.assert_('geometry' not in gf) gf2 = df.set_geometry('location', crs=self.df.crs, drop=True) self.assertIsInstance(df, pd.DataFrame) self.assertIsInstance(gf2, GeoDataFrame) self.assertEqual(gf2.geometry.name, 'geometry') self.assert_('geometry' in gf2) self.assert_('location' not in gf2) self.assert_('location' in df) # should be a copy df.ix[0, "A"] = 100 self.assertEqual(gf.ix[0, "A"], 0) self.assertEqual(gf2.ix[0, "A"], 0) with self.assertRaises(ValueError): df.set_geometry('location', inplace=True)
for i in range(len(LISST_ADCP)): x_crd = LISST_ADCP.iloc[i]['Latitude'] y_crd = LISST_ADCP.iloc[i]['Longitude'] crd_pt = Point(y_crd, x_crd) crd_ls.append(crd_pt) LISST_ADCP = LISST_ADCP[[ '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', 'D50', 'SSC', 'WaterTemp', 'Latitude', 'Longitude', 'MeanDepth', 'VertiDepth' ]] LISST_ADCP['geometry'] = crd_ls LISST_ADCP_gdf = GeoDataFrame(LISST_ADCP, crs="EPSG:4326", geometry='geometry') LISST_ADCP_gdf1 = LISST_ADCP_gdf.copy() LISST_ADCP_gdf2 = LISST_ADCP_gdf.copy() avg_dict = dict() for i in tqdm(range(len(LISST_ADCP_gdf1))): curi_x = LISST_ADCP_gdf1.iloc[i].geometry.x curi_y = LISST_ADCP_gdf1.iloc[i].geometry.y avg_ls = [] if i > 0: temp = np.concatenate(list(avg_dict.values())) if i not in temp: for j in range(i, len(LISST_ADCP_gdf2)):
class TestDataFrame: def setup_method(self): N = 10 nybb_filename = geopandas.datasets.get_path('nybb') self.df = read_file(nybb_filename) self.tempdir = tempfile.mkdtemp() self.boros = self.df['BoroName'] self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([{ 'geometry': Point(x, y), 'value1': x + y, 'value2': x * y } for x, y in zip(range(N), range(N))], crs=self.crs) self.df3 = read_file( os.path.join(PACKAGE_DIR, 'examples', 'null_geom.geojson')) self.line_paths = self.df3['Name'] def teardown_method(self): shutil.rmtree(self.tempdir) def test_df_init(self): assert type(self.df2) is GeoDataFrame assert self.df2.crs == self.crs def test_different_geo_colname(self): data = { "A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))] } df = GeoDataFrame(data, crs=self.crs, geometry='location') locs = GeoSeries(data['location'], crs=self.crs) assert_geoseries_equal(df.geometry, locs) assert 'geometry' not in df assert df.geometry.name == 'location' # internal implementation detail assert df._geometry_column_name == 'location' geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))] df2 = df.set_geometry(geom2, crs='dummy_crs') assert 'location' in df2 assert df2.crs == 'dummy_crs' assert df2.geometry.crs == 'dummy_crs' # reset so it outputs okay df2.crs = df.crs assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs)) def test_geo_getitem(self): data = { "A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))] } df = GeoDataFrame(data, crs=self.crs, geometry='location') assert isinstance(df.geometry, GeoSeries) df['geometry'] = df["A"] assert isinstance(df.geometry, GeoSeries) assert df.geometry[0] == data['location'][0] # good if this changed in the future assert not isinstance(df['geometry'], GeoSeries) assert isinstance(df['location'], GeoSeries) data["geometry"] = [ Point(x + 1, y - 1) for x, y in zip(range(5), range(5)) ] df = GeoDataFrame(data, crs=self.crs) assert isinstance(df.geometry, GeoSeries) assert isinstance(df['geometry'], GeoSeries) # good if this changed in the future assert not isinstance(df['location'], GeoSeries) def test_geometry_property(self): assert_geoseries_equal(self.df.geometry, self.df['geometry'], check_dtype=True, check_index_type=True) df = self.df.copy() new_geom = [ Point(x, y) for x, y in zip(range(len(self.df)), range(len(self.df))) ] df.geometry = new_geom new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs) assert_geoseries_equal(df.geometry, new_geom) assert_geoseries_equal(df['geometry'], new_geom) # new crs gs = GeoSeries(new_geom, crs="epsg:26018") df.geometry = gs assert df.crs == "epsg:26018" def test_geometry_property_errors(self): with pytest.raises(AttributeError): df = self.df.copy() del df['geometry'] df.geometry # list-like error with pytest.raises(ValueError): df = self.df2.copy() df.geometry = 'value1' # list-like error with pytest.raises(ValueError): df = self.df.copy() df.geometry = 'apple' # non-geometry error with pytest.raises(TypeError): df = self.df.copy() df.geometry = list(range(df.shape[0])) with pytest.raises(KeyError): df = self.df.copy() del df['geometry'] df['geometry'] # ndim error with pytest.raises(ValueError): df = self.df.copy() df.geometry = df def test_set_geometry(self): geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))]) original_geom = self.df.geometry df2 = self.df.set_geometry(geom) assert self.df is not df2 assert_geoseries_equal(df2.geometry, geom) assert_geoseries_equal(self.df.geometry, original_geom) assert_geoseries_equal(self.df['geometry'], self.df.geometry) # unknown column with pytest.raises(ValueError): self.df.set_geometry('nonexistent-column') # ndim error with pytest.raises(ValueError): self.df.set_geometry(self.df) # new crs - setting should default to GeoSeries' crs gs = GeoSeries(geom, crs="epsg:26018") new_df = self.df.set_geometry(gs) assert new_df.crs == "epsg:26018" # explicit crs overrides self and dataframe new_df = self.df.set_geometry(gs, crs="epsg:27159") assert new_df.crs == "epsg:27159" assert new_df.geometry.crs == "epsg:27159" # Series should use dataframe's new_df = self.df.set_geometry(geom.values) assert new_df.crs == self.df.crs assert new_df.geometry.crs == self.df.crs def test_set_geometry_col(self): g = self.df.geometry g_simplified = g.simplify(100) self.df['simplified_geometry'] = g_simplified df2 = self.df.set_geometry('simplified_geometry') # Drop is false by default assert 'simplified_geometry' in df2 assert_geoseries_equal(df2.geometry, g_simplified) # If True, drops column and renames to geometry df3 = self.df.set_geometry('simplified_geometry', drop=True) assert 'simplified_geometry' not in df3 assert_geoseries_equal(df3.geometry, g_simplified) def test_set_geometry_inplace(self): geom = [Point(x, y) for x, y in zip(range(5), range(5))] ret = self.df.set_geometry(geom, inplace=True) assert ret is None geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs) assert_geoseries_equal(self.df.geometry, geom) def test_set_geometry_series(self): # Test when setting geometry with a Series that # alignment will occur # # Reverse the index order # Set the Series to be Point(i,i) where i is the index self.df.index = range(len(self.df) - 1, -1, -1) d = {} for i in range(len(self.df)): d[i] = Point(i, i) g = GeoSeries(d) # At this point, the DataFrame index is [4,3,2,1,0] and the # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns # them to match indexes df = self.df.set_geometry(g) for i, r in df.iterrows(): assert i == r['geometry'].x assert i == r['geometry'].y def test_to_json(self): text = self.df.to_json() data = json.loads(text) assert data['type'] == 'FeatureCollection' assert len(data['features']) == 5 def test_to_json_geom_col(self): df = self.df.copy() df['geom'] = df['geometry'] df['geometry'] = np.arange(len(df)) df.set_geometry('geom', inplace=True) text = df.to_json() data = json.loads(text) assert data['type'] == 'FeatureCollection' assert len(data['features']) == 5 def test_to_json_na(self): # Set a value as nan and make sure it's written self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan text = self.df.to_json() data = json.loads(text) assert len(data['features']) == 5 for f in data['features']: props = f['properties'] assert len(props) == 4 if props['BoroName'] == 'Queens': assert props['Shape_Area'] is None def test_to_json_bad_na(self): # Check that a bad na argument raises error with pytest.raises(ValueError): self.df.to_json(na='garbage') def test_to_json_dropna(self): self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan text = self.df.to_json(na='drop') data = json.loads(text) assert len(data['features']) == 5 for f in data['features']: props = f['properties'] if props['BoroName'] == 'Queens': assert len(props) == 3 assert 'Shape_Area' not in props # Just make sure setting it to nan in a different row # doesn't affect this one assert 'Shape_Leng' in props elif props['BoroName'] == 'Bronx': assert len(props) == 3 assert 'Shape_Leng' not in props assert 'Shape_Area' in props else: assert len(props) == 4 def test_to_json_keepna(self): self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan text = self.df.to_json(na='keep') data = json.loads(text) assert len(data['features']) == 5 for f in data['features']: props = f['properties'] assert len(props) == 4 if props['BoroName'] == 'Queens': assert np.isnan(props['Shape_Area']) # Just make sure setting it to nan in a different row # doesn't affect this one assert 'Shape_Leng' in props elif props['BoroName'] == 'Bronx': assert np.isnan(props['Shape_Leng']) assert 'Shape_Area' in props def test_copy(self): df2 = self.df.copy() assert type(df2) is GeoDataFrame assert self.df.crs == df2.crs def test_to_file(self): """ Test to_file and from_file """ tempfilename = os.path.join(self.tempdir, 'boros.shp') self.df.to_file(tempfilename) # Read layer back in df = GeoDataFrame.from_file(tempfilename) assert 'geometry' in df assert len(df) == 5 assert np.alltrue(df['BoroName'].values == self.boros) # Write layer with null geometry out to file tempfilename = os.path.join(self.tempdir, 'null_geom.shp') self.df3.to_file(tempfilename) # Read layer back in df3 = GeoDataFrame.from_file(tempfilename) assert 'geometry' in df3 assert len(df3) == 2 assert np.alltrue(df3['Name'].values == self.line_paths) def test_to_file_types(self): """ Test various integer type columns (GH#93) """ tempfilename = os.path.join(self.tempdir, 'int.shp') int_types = [ np.int, np.int8, np.int16, np.int32, np.int64, np.intp, np.uint8, np.uint16, np.uint32, np.uint64, np.long ] geometry = self.df2.geometry data = dict((str(i), np.arange(len(geometry), dtype=dtype)) for i, dtype in enumerate(int_types)) df = GeoDataFrame(data, geometry=geometry) df.to_file(tempfilename) def test_mixed_types_to_file(self): """ Test that mixed geometry types raise error when writing to file """ tempfilename = os.path.join(self.tempdir, 'test.shp') s = GeoDataFrame( {'geometry': [Point(0, 0), Polygon([(0, 0), (1, 0), (1, 1)])]}) with pytest.raises(ValueError): s.to_file(tempfilename) def test_to_file_schema(self): """ Ensure that the file is written according to the schema if it is specified """ from collections import OrderedDict tempfilename = os.path.join(self.tempdir, 'test.shp') properties = OrderedDict([ ('Shape_Leng', 'float:19.11'), ('BoroName', 'str:40'), ('BoroCode', 'int:10'), ('Shape_Area', 'float:19.11'), ]) schema = {'geometry': 'Polygon', 'properties': properties} # Take the first 2 features to speed things up a bit self.df.iloc[:2].to_file(tempfilename, schema=schema) with fiona.open(tempfilename) as f: result_schema = f.schema assert result_schema == schema def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df['BoroName'].str.contains('B')] assert len(df) == 2 boros = df['BoroName'].values assert 'Brooklyn' in boros assert 'Bronx' in boros assert type(df) is GeoDataFrame def test_coord_slice_points(self): assert self.df2.cx[-2:-1, -2:-1].empty assert_frame_equal(self.df2, self.df2.cx[:, :]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, :]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[:, 5:]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, 5:]) def test_transform(self): df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) assert all(df2['geometry'].geom_almost_equals(utm['geometry'], decimal=2)) def test_to_crs_geo_column_name(self): # Test to_crs() with different geometry column name (GH#339) df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} df2 = df2.rename(columns={'geometry': 'geom'}) df2.set_geometry('geom', inplace=True) lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) assert lonlat.geometry.name == 'geom' assert utm.geometry.name == 'geom' assert all(df2.geometry.geom_almost_equals(utm.geometry, decimal=2)) def test_from_features(self): nybb_filename = geopandas.datasets.get_path('nybb') with fiona.open(nybb_filename) as f: features = list(f) crs = f.crs df = GeoDataFrame.from_features(features, crs=crs) validate_boro_df(df, case_sensitive=True) assert df.crs == crs def test_from_features_unaligned_properties(self): p1 = Point(1, 1) f1 = { 'type': 'Feature', 'properties': { 'a': 0 }, 'geometry': p1.__geo_interface__ } p2 = Point(2, 2) f2 = { 'type': 'Feature', 'properties': { 'b': 1 }, 'geometry': p2.__geo_interface__ } p3 = Point(3, 3) f3 = { 'type': 'Feature', 'properties': { 'a': 2 }, 'geometry': p3.__geo_interface__ } df = GeoDataFrame.from_features([f1, f2, f3]) result = df[['a', 'b']] expected = pd.DataFrame.from_dict([{ 'a': 0, 'b': np.nan }, { 'a': np.nan, 'b': 1 }, { 'a': 2, 'b': np.nan }]) assert_frame_equal(expected, result) def test_from_feature_collection(self): data = { 'name': ['a', 'b', 'c'], 'lat': [45, 46, 47.5], 'lon': [-120, -121.2, -122.9] } df = pd.DataFrame(data) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] gdf = GeoDataFrame(df, geometry=geometry) # from_features returns sorted columns expected = gdf[['geometry', 'lat', 'lon', 'name']] # test FeatureCollection res = GeoDataFrame.from_features(gdf.__geo_interface__) assert_frame_equal(res, expected) # test list of Features res = GeoDataFrame.from_features(gdf.__geo_interface__['features']) assert_frame_equal(res, expected) # test __geo_interface__ attribute (a GeoDataFrame has one) res = GeoDataFrame.from_features(gdf) assert_frame_equal(res, expected) def test_from_postgis_default(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise pytest.skip() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con) finally: con.close() validate_boro_df(df, case_sensitive=False) def test_from_postgis_custom_geom_col(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise pytest.skip() try: sql = """SELECT borocode, boroname, shape_leng, shape_area, geom AS __geometry__ FROM nybb;""" df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__') finally: con.close() validate_boro_df(df, case_sensitive=False) def test_dataframe_to_geodataframe(self): df = pd.DataFrame( { "A": range(len(self.df)), "location": list(self.df.geometry) }, index=self.df.index) gf = df.set_geometry('location', crs=self.df.crs) assert isinstance(df, pd.DataFrame) assert isinstance(gf, GeoDataFrame) assert_geoseries_equal(gf.geometry, self.df.geometry) assert gf.geometry.name == 'location' assert 'geometry' not in gf gf2 = df.set_geometry('location', crs=self.df.crs, drop=True) assert isinstance(df, pd.DataFrame) assert isinstance(gf2, GeoDataFrame) assert gf2.geometry.name == 'geometry' assert 'geometry' in gf2 assert 'location' not in gf2 assert 'location' in df # should be a copy df.ix[0, "A"] = 100 assert gf.ix[0, "A"] == 0 assert gf2.ix[0, "A"] == 0 with pytest.raises(ValueError): df.set_geometry('location', inplace=True) def test_geodataframe_geointerface(self): assert self.df.__geo_interface__['type'] == 'FeatureCollection' assert len(self.df.__geo_interface__['features']) == self.df.shape[0] def test_geodataframe_geojson_no_bbox(self): geo = self.df._to_geo(na="null", show_bbox=False) assert 'bbox' not in geo.keys() for feature in geo['features']: assert 'bbox' not in feature.keys() def test_geodataframe_geojson_bbox(self): geo = self.df._to_geo(na="null", show_bbox=True) assert 'bbox' in geo.keys() assert len(geo['bbox']) == 4 assert isinstance(geo['bbox'], tuple) for feature in geo['features']: assert 'bbox' in feature.keys() def test_pickle(self): filename = os.path.join(self.tempdir, 'df.pkl') self.df.to_pickle(filename) unpickled = pd.read_pickle(filename) assert_frame_equal(self.df, unpickled) assert self.df.crs == unpickled.crs
class TestDataFrame(unittest.TestCase): def setUp(self): N = 10 nybb_filename = download_nybb() self.df = read_file('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename) self.tempdir = tempfile.mkdtemp() self.boros = self.df['BoroName'] self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([ {'geometry' : Point(x, y), 'value1': x + y, 'value2': x * y} for x, y in zip(range(N), range(N))], crs=self.crs) def tearDown(self): shutil.rmtree(self.tempdir) def test_df_init(self): self.assertTrue(type(self.df2) is GeoDataFrame) self.assertTrue(self.df2.crs == self.crs) def test_different_geo_colname(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry='location') locs = GeoSeries(data['location'], crs=self.crs) assert_geoseries_equal(df.geometry, locs) self.assert_('geometry' not in df) self.assertEqual(df.geometry.name, 'location') # internal implementation detail self.assertEqual(df._geometry_column_name, 'location') geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))] df2 = df.set_geometry(geom2, crs='dummy_crs') self.assert_('geometry' in df2) self.assert_('location' in df2) self.assertEqual(df2.crs, 'dummy_crs') self.assertEqual(df2.geometry.crs, 'dummy_crs') # reset so it outputs okay df2.crs = df.crs assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs)) # for right now, non-geometry comes back as series assert_geoseries_equal(df2['location'], df['location'], check_series_type=False, check_dtype=False) def test_geo_getitem(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry='location') self.assert_(isinstance(df.geometry, GeoSeries)) df['geometry'] = df["A"] self.assert_(isinstance(df.geometry, GeoSeries)) self.assertEqual(df.geometry[0], data['location'][0]) # good if this changed in the future self.assert_(not isinstance(df['geometry'], GeoSeries)) self.assert_(isinstance(df['location'], GeoSeries)) data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5), range(5))] df = GeoDataFrame(data, crs=self.crs) self.assert_(isinstance(df.geometry, GeoSeries)) self.assert_(isinstance(df['geometry'], GeoSeries)) # good if this changed in the future self.assert_(not isinstance(df['location'], GeoSeries)) def test_geometry_property(self): assert_geoseries_equal(self.df.geometry, self.df['geometry'], check_dtype=True, check_index_type=True) df = self.df.copy() new_geom = [Point(x,y) for x, y in zip(range(len(self.df)), range(len(self.df)))] df.geometry = new_geom new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs) assert_geoseries_equal(df.geometry, new_geom) assert_geoseries_equal(df['geometry'], new_geom) # new crs gs = GeoSeries(new_geom, crs="epsg:26018") df.geometry = gs self.assertEqual(df.crs, "epsg:26018") def test_geometry_property_errors(self): with self.assertRaises(AttributeError): df = self.df.copy() del df['geometry'] df.geometry # list-like error with self.assertRaises(ValueError): df = self.df2.copy() df.geometry = 'value1' # list-like error with self.assertRaises(ValueError): df = self.df.copy() df.geometry = 'apple' # non-geometry error with self.assertRaises(TypeError): df = self.df.copy() df.geometry = list(range(df.shape[0])) with self.assertRaises(KeyError): df = self.df.copy() del df['geometry'] df['geometry'] # ndim error with self.assertRaises(ValueError): df = self.df.copy() df.geometry = df def test_set_geometry(self): geom = GeoSeries([Point(x,y) for x,y in zip(range(5), range(5))]) original_geom = self.df.geometry df2 = self.df.set_geometry(geom) self.assert_(self.df is not df2) assert_geoseries_equal(df2.geometry, geom) assert_geoseries_equal(self.df.geometry, original_geom) assert_geoseries_equal(self.df['geometry'], self.df.geometry) # unknown column with self.assertRaises(ValueError): self.df.set_geometry('nonexistent-column') # ndim error with self.assertRaises(ValueError): self.df.set_geometry(self.df) # new crs - setting should default to GeoSeries' crs gs = GeoSeries(geom, crs="epsg:26018") new_df = self.df.set_geometry(gs) self.assertEqual(new_df.crs, "epsg:26018") # explicit crs overrides self and dataframe new_df = self.df.set_geometry(gs, crs="epsg:27159") self.assertEqual(new_df.crs, "epsg:27159") self.assertEqual(new_df.geometry.crs, "epsg:27159") # Series should use dataframe's new_df = self.df.set_geometry(geom.values) self.assertEqual(new_df.crs, self.df.crs) self.assertEqual(new_df.geometry.crs, self.df.crs) def test_set_geometry_col(self): g = self.df.geometry g_simplified = g.simplify(100) self.df['simplified_geometry'] = g_simplified df2 = self.df.set_geometry('simplified_geometry') # Drop is false by default self.assert_('simplified_geometry' in df2) assert_geoseries_equal(df2.geometry, g_simplified) # If True, drops column and renames to geometry df3 = self.df.set_geometry('simplified_geometry', drop=True) self.assert_('simplified_geometry' not in df3) assert_geoseries_equal(df3.geometry, g_simplified) def test_set_geometry_inplace(self): geom = [Point(x,y) for x,y in zip(range(5), range(5))] ret = self.df.set_geometry(geom, inplace=True) self.assert_(ret is None) geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs) assert_geoseries_equal(self.df.geometry, geom) def test_set_geometry_series(self): # Test when setting geometry with a Series that # alignment will occur # # Reverse the index order # Set the Series to be Point(i,i) where i is the index self.df.index = range(len(self.df)-1, -1, -1) d = {} for i in range(len(self.df)): d[i] = Point(i, i) g = GeoSeries(d) # At this point, the DataFrame index is [4,3,2,1,0] and the # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns # them to match indexes df = self.df.set_geometry(g) for i, r in df.iterrows(): self.assertAlmostEqual(i, r['geometry'].x) self.assertAlmostEqual(i, r['geometry'].y) def test_to_json(self): text = self.df.to_json() data = json.loads(text) self.assertTrue(data['type'] == 'FeatureCollection') self.assertTrue(len(data['features']) == 5) def test_to_json_geom_col(self): df = self.df.copy() df['geom'] = df['geometry'] df['geometry'] = np.arange(len(df)) df.set_geometry('geom', inplace=True) text = df.to_json() data = json.loads(text) self.assertTrue(data['type'] == 'FeatureCollection') self.assertTrue(len(data['features']) == 5) def test_to_json_na(self): # Set a value as nan and make sure it's written self.df['Shape_Area'][self.df['BoroName']=='Queens'] = np.nan text = self.df.to_json() data = json.loads(text) self.assertTrue(len(data['features']) == 5) for f in data['features']: props = f['properties'] self.assertEqual(len(props), 4) if props['BoroName'] == 'Queens': self.assertTrue(props['Shape_Area'] is None) def test_to_json_bad_na(self): # Check that a bad na argument raises error with self.assertRaises(ValueError): text = self.df.to_json(na='garbage') def test_to_json_dropna(self): self.df['Shape_Area'][self.df['BoroName']=='Queens'] = np.nan self.df['Shape_Leng'][self.df['BoroName']=='Bronx'] = np.nan text = self.df.to_json(na='drop') data = json.loads(text) self.assertEqual(len(data['features']), 5) for f in data['features']: props = f['properties'] if props['BoroName'] == 'Queens': self.assertEqual(len(props), 3) self.assertTrue('Shape_Area' not in props) # Just make sure setting it to nan in a different row # doesn't affect this one self.assertTrue('Shape_Leng' in props) elif props['BoroName'] == 'Bronx': self.assertEqual(len(props), 3) self.assertTrue('Shape_Leng' not in props) self.assertTrue('Shape_Area' in props) else: self.assertEqual(len(props), 4) def test_to_json_keepna(self): self.df['Shape_Area'][self.df['BoroName']=='Queens'] = np.nan self.df['Shape_Leng'][self.df['BoroName']=='Bronx'] = np.nan text = self.df.to_json(na='keep') data = json.loads(text) self.assertEqual(len(data['features']), 5) for f in data['features']: props = f['properties'] self.assertEqual(len(props), 4) if props['BoroName'] == 'Queens': self.assertTrue(np.isnan(props['Shape_Area'])) # Just make sure setting it to nan in a different row # doesn't affect this one self.assertTrue('Shape_Leng' in props) elif props['BoroName'] == 'Bronx': self.assertTrue(np.isnan(props['Shape_Leng'])) self.assertTrue('Shape_Area' in props) def test_copy(self): df2 = self.df.copy() self.assertTrue(type(df2) is GeoDataFrame) self.assertEqual(self.df.crs, df2.crs) def test_to_file(self): """ Test to_file and from_file """ tempfilename = os.path.join(self.tempdir, 'boros.shp') self.df.to_file(tempfilename) # Read layer back in? df = GeoDataFrame.from_file(tempfilename) self.assertTrue('geometry' in df) self.assertTrue(len(df) == 5) self.assertTrue(np.alltrue(df['BoroName'].values == self.boros)) def test_to_file_types(self): """ Test various integer type columns (GH#93) """ tempfilename = os.path.join(self.tempdir, 'int.shp') int_types = [np.int, np.int8, np.int16, np.int32, np.int64, np.intp, np.uint8, np.uint16, np.uint32, np.uint64, np.long] geometry = self.df2.geometry data = dict((str(i), np.arange(len(geometry), dtype=dtype)) for i, dtype in enumerate(int_types)) df = GeoDataFrame(data, geometry=geometry) df.to_file(tempfilename) def test_mixed_types_to_file(self): """ Test that mixed geometry types raise error when writing to file """ tempfilename = os.path.join(self.tempdir, 'test.shp') s = GeoDataFrame({'geometry' : [Point(0, 0), Polygon([(0, 0), (1, 0), (1, 1)])]}) with self.assertRaises(ValueError): s.to_file(tempfilename) def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df['BoroName'].str.contains('B')] self.assertTrue(len(df) == 2) boros = df['BoroName'].values self.assertTrue('Brooklyn' in boros) self.assertTrue('Bronx' in boros) self.assertTrue(type(df) is GeoDataFrame) def test_transform(self): df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) self.assertTrue(all(df2['geometry'].geom_almost_equals(utm['geometry'], decimal=2))) def test_from_features(self): nybb_filename = download_nybb() with fiona.open('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename) as f: features = list(f) crs = f.crs df = GeoDataFrame.from_features(features, crs=crs) df.rename(columns=lambda x: x.lower(), inplace=True) validate_boro_df(self, df) self.assert_(df.crs == crs) def test_from_features_unaligned_properties(self): p1 = Point(1,1) f1 = {'type': 'Feature', 'properties': {'a': 0}, 'geometry': p1.__geo_interface__} p2 = Point(2,2) f2 = {'type': 'Feature', 'properties': {'b': 1}, 'geometry': p2.__geo_interface__} p3 = Point(3,3) f3 = {'type': 'Feature', 'properties': {'a': 2}, 'geometry': p3.__geo_interface__} df = GeoDataFrame.from_features([f1, f2, f3]) result = df[['a', 'b']] expected = pd.DataFrame.from_dict([{'a': 0, 'b': np.nan}, {'a': np.nan, 'b': 1}, {'a': 2, 'b': np.nan}]) assert_frame_equal(expected, result) def test_from_postgis_default(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise unittest.case.SkipTest() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con) finally: if PANDAS_NEW_SQL_API: # It's not really a connection, it's an engine con = con.connect() con.close() validate_boro_df(self, df) def test_from_postgis_custom_geom_col(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise unittest.case.SkipTest() try: sql = """SELECT borocode, boroname, shape_leng, shape_area, geom AS __geometry__ FROM nybb;""" df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__') finally: if PANDAS_NEW_SQL_API: # It's not really a connection, it's an engine con = con.connect() con.close() validate_boro_df(self, df) def test_dataframe_to_geodataframe(self): df = pd.DataFrame({"A": range(len(self.df)), "location": list(self.df.geometry)}, index=self.df.index) gf = df.set_geometry('location', crs=self.df.crs) self.assertIsInstance(df, pd.DataFrame) self.assertIsInstance(gf, GeoDataFrame) assert_geoseries_equal(gf.geometry, self.df.geometry) self.assertEqual(gf.geometry.name, 'location') self.assert_('geometry' not in gf) gf2 = df.set_geometry('location', crs=self.df.crs, drop=True) self.assertIsInstance(df, pd.DataFrame) self.assertIsInstance(gf2, GeoDataFrame) self.assertEqual(gf2.geometry.name, 'geometry') self.assert_('geometry' in gf2) self.assert_('location' not in gf2) self.assert_('location' in df) # should be a copy df.ix[0, "A"] = 100 self.assertEqual(gf.ix[0, "A"], 0) self.assertEqual(gf2.ix[0, "A"], 0) with self.assertRaises(ValueError): df.set_geometry('location', inplace=True) def test_geodataframe_geointerface(self): self.assertEqual(self.df.__geo_interface__['type'], 'FeatureCollection') self.assertEqual(len(self.df.__geo_interface__['features']), self.df.shape[0]) def test_geodataframe_geojson_no_bbox(self): geo = self.df._to_geo(na="null", show_bbox=False) self.assertFalse('bbox' in geo.keys()) for feature in geo['features']: self.assertFalse('bbox' in feature.keys()) def test_geodataframe_geojson_bbox(self): geo = self.df._to_geo(na="null", show_bbox=True) self.assertTrue('bbox' in geo.keys()) self.assertEqual(len(geo['bbox']), 4) self.assertTrue(isinstance(geo['bbox'], tuple)) for feature in geo['features']: self.assertTrue('bbox' in feature.keys())
class TestDataFrame(unittest.TestCase): def setUp(self): N = 10 nybb_filename = download_nybb() self.df = read_file('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename) self.tempdir = tempfile.mkdtemp() self.boros = self.df['BoroName'] self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([{ 'geometry': Point(x, y), 'value1': x + y, 'value2': x * y } for x, y in zip(range(N), range(N))], crs=self.crs) def tearDown(self): shutil.rmtree(self.tempdir) def test_df_init(self): self.assertTrue(type(self.df2) is GeoDataFrame) self.assertTrue(self.df2.crs == self.crs) def test_different_geo_colname(self): data = { "A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))] } df = GeoDataFrame(data, crs=self.crs, geometry='location') locs = GeoSeries(data['location'], crs=self.crs) assert_geoseries_equal(df.geometry, locs) self.assert_('geometry' not in df) self.assertEqual(df.geometry.name, 'location') # internal implementation detail self.assertEqual(df._geometry_column_name, 'location') geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))] df2 = df.set_geometry(geom2, crs='dummy_crs') self.assert_('geometry' in df2) self.assert_('location' in df2) self.assertEqual(df2.crs, 'dummy_crs') self.assertEqual(df2.geometry.crs, 'dummy_crs') # reset so it outputs okay df2.crs = df.crs assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs)) # for right now, non-geometry comes back as series assert_geoseries_equal(df2['location'], df['location'], check_series_type=False, check_dtype=False) def test_geo_getitem(self): data = { "A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))] } df = GeoDataFrame(data, crs=self.crs, geometry='location') self.assert_(isinstance(df.geometry, GeoSeries)) df['geometry'] = df["A"] self.assert_(isinstance(df.geometry, GeoSeries)) self.assertEqual(df.geometry[0], data['location'][0]) # good if this changed in the future self.assert_(not isinstance(df['geometry'], GeoSeries)) self.assert_(isinstance(df['location'], GeoSeries)) data["geometry"] = [ Point(x + 1, y - 1) for x, y in zip(range(5), range(5)) ] df = GeoDataFrame(data, crs=self.crs) self.assert_(isinstance(df.geometry, GeoSeries)) self.assert_(isinstance(df['geometry'], GeoSeries)) # good if this changed in the future self.assert_(not isinstance(df['location'], GeoSeries)) def test_geometry_property(self): assert_geoseries_equal(self.df.geometry, self.df['geometry'], check_dtype=True, check_index_type=True) df = self.df.copy() new_geom = [ Point(x, y) for x, y in zip(range(len(self.df)), range(len(self.df))) ] df.geometry = new_geom new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs) assert_geoseries_equal(df.geometry, new_geom) assert_geoseries_equal(df['geometry'], new_geom) # new crs gs = GeoSeries(new_geom, crs="epsg:26018") df.geometry = gs self.assertEqual(df.crs, "epsg:26018") def test_geometry_property_errors(self): with self.assertRaises(AttributeError): df = self.df.copy() del df['geometry'] df.geometry # list-like error with self.assertRaises(ValueError): df = self.df2.copy() df.geometry = 'value1' # list-like error with self.assertRaises(ValueError): df = self.df.copy() df.geometry = 'apple' # non-geometry error with self.assertRaises(TypeError): df = self.df.copy() df.geometry = list(range(df.shape[0])) with self.assertRaises(KeyError): df = self.df.copy() del df['geometry'] df['geometry'] # ndim error with self.assertRaises(ValueError): df = self.df.copy() df.geometry = df def test_set_geometry(self): geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))]) original_geom = self.df.geometry df2 = self.df.set_geometry(geom) self.assert_(self.df is not df2) assert_geoseries_equal(df2.geometry, geom) assert_geoseries_equal(self.df.geometry, original_geom) assert_geoseries_equal(self.df['geometry'], self.df.geometry) # unknown column with self.assertRaises(ValueError): self.df.set_geometry('nonexistent-column') # ndim error with self.assertRaises(ValueError): self.df.set_geometry(self.df) # new crs - setting should default to GeoSeries' crs gs = GeoSeries(geom, crs="epsg:26018") new_df = self.df.set_geometry(gs) self.assertEqual(new_df.crs, "epsg:26018") # explicit crs overrides self and dataframe new_df = self.df.set_geometry(gs, crs="epsg:27159") self.assertEqual(new_df.crs, "epsg:27159") self.assertEqual(new_df.geometry.crs, "epsg:27159") # Series should use dataframe's new_df = self.df.set_geometry(geom.values) self.assertEqual(new_df.crs, self.df.crs) self.assertEqual(new_df.geometry.crs, self.df.crs) def test_set_geometry_col(self): g = self.df.geometry g_simplified = g.simplify(100) self.df['simplified_geometry'] = g_simplified df2 = self.df.set_geometry('simplified_geometry') # Drop is false by default self.assert_('simplified_geometry' in df2) assert_geoseries_equal(df2.geometry, g_simplified) # If True, drops column and renames to geometry df3 = self.df.set_geometry('simplified_geometry', drop=True) self.assert_('simplified_geometry' not in df3) assert_geoseries_equal(df3.geometry, g_simplified) def test_set_geometry_inplace(self): geom = [Point(x, y) for x, y in zip(range(5), range(5))] ret = self.df.set_geometry(geom, inplace=True) self.assert_(ret is None) geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs) assert_geoseries_equal(self.df.geometry, geom) def test_set_geometry_series(self): # Test when setting geometry with a Series that # alignment will occur # # Reverse the index order # Set the Series to be Point(i,i) where i is the index self.df.index = range(len(self.df) - 1, -1, -1) d = {} for i in range(len(self.df)): d[i] = Point(i, i) g = GeoSeries(d) # At this point, the DataFrame index is [4,3,2,1,0] and the # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns # them to match indexes df = self.df.set_geometry(g) for i, r in df.iterrows(): self.assertAlmostEqual(i, r['geometry'].x) self.assertAlmostEqual(i, r['geometry'].y) def test_to_json(self): text = self.df.to_json() data = json.loads(text) self.assertTrue(data['type'] == 'FeatureCollection') self.assertTrue(len(data['features']) == 5) def test_to_json_geom_col(self): df = self.df.copy() df['geom'] = df['geometry'] df['geometry'] = np.arange(len(df)) df.set_geometry('geom', inplace=True) text = df.to_json() data = json.loads(text) self.assertTrue(data['type'] == 'FeatureCollection') self.assertTrue(len(data['features']) == 5) def test_to_json_na(self): # Set a value as nan and make sure it's written self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan text = self.df.to_json() data = json.loads(text) self.assertTrue(len(data['features']) == 5) for f in data['features']: props = f['properties'] self.assertEqual(len(props), 4) if props['BoroName'] == 'Queens': self.assertTrue(props['Shape_Area'] is None) def test_to_json_bad_na(self): # Check that a bad na argument raises error with self.assertRaises(ValueError): text = self.df.to_json(na='garbage') def test_to_json_dropna(self): self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan text = self.df.to_json(na='drop') data = json.loads(text) self.assertEqual(len(data['features']), 5) for f in data['features']: props = f['properties'] if props['BoroName'] == 'Queens': self.assertEqual(len(props), 3) self.assertTrue('Shape_Area' not in props) # Just make sure setting it to nan in a different row # doesn't affect this one self.assertTrue('Shape_Leng' in props) elif props['BoroName'] == 'Bronx': self.assertEqual(len(props), 3) self.assertTrue('Shape_Leng' not in props) self.assertTrue('Shape_Area' in props) else: self.assertEqual(len(props), 4) def test_to_json_keepna(self): self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan text = self.df.to_json(na='keep') data = json.loads(text) self.assertEqual(len(data['features']), 5) for f in data['features']: props = f['properties'] self.assertEqual(len(props), 4) if props['BoroName'] == 'Queens': self.assertTrue(np.isnan(props['Shape_Area'])) # Just make sure setting it to nan in a different row # doesn't affect this one self.assertTrue('Shape_Leng' in props) elif props['BoroName'] == 'Bronx': self.assertTrue(np.isnan(props['Shape_Leng'])) self.assertTrue('Shape_Area' in props) def test_copy(self): df2 = self.df.copy() self.assertTrue(type(df2) is GeoDataFrame) self.assertEqual(self.df.crs, df2.crs) def test_to_file(self): """ Test to_file and from_file """ tempfilename = os.path.join(self.tempdir, 'boros.shp') self.df.to_file(tempfilename) # Read layer back in? df = GeoDataFrame.from_file(tempfilename) self.assertTrue('geometry' in df) self.assertTrue(len(df) == 5) self.assertTrue(np.alltrue(df['BoroName'].values == self.boros)) def test_to_file_types(self): """ Test various integer type columns (GH#93) """ tempfilename = os.path.join(self.tempdir, 'int.shp') int_types = [ np.int, np.int8, np.int16, np.int32, np.int64, np.intp, np.uint8, np.uint16, np.uint32, np.uint64, np.long ] geometry = self.df2.geometry data = dict((str(i), np.arange(len(geometry), dtype=dtype)) for i, dtype in enumerate(int_types)) df = GeoDataFrame(data, geometry=geometry) df.to_file(tempfilename) def test_mixed_types_to_file(self): """ Test that mixed geometry types raise error when writing to file """ tempfilename = os.path.join(self.tempdir, 'test.shp') s = GeoDataFrame( {'geometry': [Point(0, 0), Polygon([(0, 0), (1, 0), (1, 1)])]}) with self.assertRaises(ValueError): s.to_file(tempfilename) def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df['BoroName'].str.contains('B')] self.assertTrue(len(df) == 2) boros = df['BoroName'].values self.assertTrue('Brooklyn' in boros) self.assertTrue('Bronx' in boros) self.assertTrue(type(df) is GeoDataFrame) def test_transform(self): df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) self.assertTrue( all(df2['geometry'].geom_almost_equals(utm['geometry'], decimal=2))) def test_from_features(self): nybb_filename = download_nybb() with fiona.open('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename) as f: features = list(f) crs = f.crs df = GeoDataFrame.from_features(features, crs=crs) df.rename(columns=lambda x: x.lower(), inplace=True) validate_boro_df(self, df) self.assert_(df.crs == crs) def test_from_features_unaligned_properties(self): p1 = Point(1, 1) f1 = { 'type': 'Feature', 'properties': { 'a': 0 }, 'geometry': p1.__geo_interface__ } p2 = Point(2, 2) f2 = { 'type': 'Feature', 'properties': { 'b': 1 }, 'geometry': p2.__geo_interface__ } p3 = Point(3, 3) f3 = { 'type': 'Feature', 'properties': { 'a': 2 }, 'geometry': p3.__geo_interface__ } df = GeoDataFrame.from_features([f1, f2, f3]) result = df[['a', 'b']] expected = pd.DataFrame.from_dict([{ 'a': 0, 'b': np.nan }, { 'a': np.nan, 'b': 1 }, { 'a': 2, 'b': np.nan }]) assert_frame_equal(expected, result) def test_from_postgis_default(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise unittest.case.SkipTest() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con) finally: if PANDAS_NEW_SQL_API: # It's not really a connection, it's an engine con = con.connect() con.close() validate_boro_df(self, df) def test_from_postgis_custom_geom_col(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise unittest.case.SkipTest() try: sql = """SELECT borocode, boroname, shape_leng, shape_area, geom AS __geometry__ FROM nybb;""" df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__') finally: if PANDAS_NEW_SQL_API: # It's not really a connection, it's an engine con = con.connect() con.close() validate_boro_df(self, df) def test_dataframe_to_geodataframe(self): df = pd.DataFrame( { "A": range(len(self.df)), "location": list(self.df.geometry) }, index=self.df.index) gf = df.set_geometry('location', crs=self.df.crs) self.assertIsInstance(df, pd.DataFrame) self.assertIsInstance(gf, GeoDataFrame) assert_geoseries_equal(gf.geometry, self.df.geometry) self.assertEqual(gf.geometry.name, 'location') self.assert_('geometry' not in gf) gf2 = df.set_geometry('location', crs=self.df.crs, drop=True) self.assertIsInstance(df, pd.DataFrame) self.assertIsInstance(gf2, GeoDataFrame) self.assertEqual(gf2.geometry.name, 'geometry') self.assert_('geometry' in gf2) self.assert_('location' not in gf2) self.assert_('location' in df) # should be a copy df.ix[0, "A"] = 100 self.assertEqual(gf.ix[0, "A"], 0) self.assertEqual(gf2.ix[0, "A"], 0) with self.assertRaises(ValueError): df.set_geometry('location', inplace=True) def test_geodataframe_geointerface(self): self.assertEqual(self.df.__geo_interface__['type'], 'FeatureCollection') self.assertEqual(len(self.df.__geo_interface__['features']), self.df.shape[0]) def test_geodataframe_geojson_no_bbox(self): geo = self.df._to_geo(na="null", show_bbox=False) self.assertFalse('bbox' in geo.keys()) for feature in geo['features']: self.assertFalse('bbox' in feature.keys()) def test_geodataframe_geojson_bbox(self): geo = self.df._to_geo(na="null", show_bbox=True) self.assertTrue('bbox' in geo.keys()) self.assertEqual(len(geo['bbox']), 4) self.assertTrue(isinstance(geo['bbox'], tuple)) for feature in geo['features']: self.assertTrue('bbox' in feature.keys())
class TestDataFrame(unittest.TestCase): def setUp(self): N = 10 nybb_filename = download_nybb() self.df = read_file('/nybb_13a/nybb.shp', vfs='zip://' + nybb_filename) self.tempdir = tempfile.mkdtemp() self.boros = np.array(['Staten Island', 'Queens', 'Brooklyn', 'Manhattan', 'Bronx']) self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([ {'geometry' : Point(x, y), 'value1': x + y, 'value2': x * y} for x, y in zip(range(N), range(N))], crs=self.crs) def tearDown(self): shutil.rmtree(self.tempdir) def test_df_init(self): self.assertTrue(type(self.df2) is GeoDataFrame) self.assertTrue(self.df2.crs == self.crs) def test_different_geo_colname(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry='location') locs = GeoSeries(data['location'], crs=self.crs) assert_geoseries_equal(df.geometry, locs) self.assert_('geometry' not in df) self.assertEqual(df.geometry.name, 'location') # internal implementation detail self.assertEqual(df._geometry_column_name, 'location') geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))] df2 = df.set_geometry(geom2, crs='dummy_crs') self.assert_('geometry' in df2) self.assert_('location' in df2) self.assertEqual(df2.crs, 'dummy_crs') self.assertEqual(df2.geometry.crs, 'dummy_crs') # reset so it outputs okay df2.crs = df.crs assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs)) # for right now, non-geometry comes back as series assert_geoseries_equal(df2['location'], df['location'], check_series_type=False, check_dtype=False) def test_geo_getitem(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry='location') self.assert_(isinstance(df.geometry, GeoSeries)) df['geometry'] = df["A"] self.assert_(isinstance(df.geometry, GeoSeries)) self.assertEqual(df.geometry[0], data['location'][0]) # good if this changed in the future self.assert_(not isinstance(df['geometry'], GeoSeries)) self.assert_(isinstance(df['location'], GeoSeries)) data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5), range(5))] df = GeoDataFrame(data, crs=self.crs) self.assert_(isinstance(df.geometry, GeoSeries)) self.assert_(isinstance(df['geometry'], GeoSeries)) # good if this changed in the future self.assert_(not isinstance(df['location'], GeoSeries)) def test_geometry_property(self): assert_geoseries_equal(self.df.geometry, self.df['geometry'], check_dtype=True, check_index_type=True) df = self.df.copy() new_geom = [Point(x,y) for x, y in zip(range(len(self.df)), range(len(self.df)))] df.geometry = new_geom new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs) assert_geoseries_equal(df.geometry, new_geom) assert_geoseries_equal(df['geometry'], new_geom) # new crs gs = GeoSeries(new_geom, crs="epsg:26018") df.geometry = gs self.assertEqual(df.crs, "epsg:26018") def test_geometry_property_errors(self): with self.assertRaises(AttributeError): df = self.df.copy() del df['geometry'] df.geometry # list-like error with self.assertRaises(ValueError): df = self.df2.copy() df.geometry = 'value1' # list-like error with self.assertRaises(ValueError): df = self.df.copy() df.geometry = 'apple' # non-geometry error with self.assertRaises(TypeError): df = self.df.copy() df.geometry = range(df.shape[0]) with self.assertRaises(KeyError): df = self.df.copy() del df['geometry'] df['geometry'] # ndim error with self.assertRaises(ValueError): df = self.df.copy() df.geometry = df def test_set_geometry(self): geom = GeoSeries([Point(x,y) for x,y in zip(range(5), range(5))]) original_geom = self.df.geometry df2 = self.df.set_geometry(geom) self.assert_(self.df is not df2) assert_geoseries_equal(df2.geometry, geom) assert_geoseries_equal(self.df.geometry, original_geom) assert_geoseries_equal(self.df['geometry'], self.df.geometry) # unknown column with self.assertRaises(ValueError): self.df.set_geometry('nonexistent-column') # ndim error with self.assertRaises(ValueError): self.df.set_geometry(self.df) # new crs - setting should default to GeoSeries' crs gs = GeoSeries(geom, crs="epsg:26018") new_df = self.df.set_geometry(gs) self.assertEqual(new_df.crs, "epsg:26018") # explicit crs overrides self and dataframe new_df = self.df.set_geometry(gs, crs="epsg:27159") self.assertEqual(new_df.crs, "epsg:27159") self.assertEqual(new_df.geometry.crs, "epsg:27159") # Series should use dataframe's new_df = self.df.set_geometry(geom.values) self.assertEqual(new_df.crs, self.df.crs) self.assertEqual(new_df.geometry.crs, self.df.crs) def test_set_geometry_col(self): g = self.df.geometry g_simplified = g.simplify(100) self.df['simplified_geometry'] = g_simplified df2 = self.df.set_geometry('simplified_geometry') # Drop is false by default self.assert_('simplified_geometry' in df2) assert_geoseries_equal(df2.geometry, g_simplified) # If True, drops column and renames to geometry df3 = self.df.set_geometry('simplified_geometry', drop=True) self.assert_('simplified_geometry' not in df3) assert_geoseries_equal(df3.geometry, g_simplified) def test_set_geometry_inplace(self): geom = [Point(x,y) for x,y in zip(range(5), range(5))] ret = self.df.set_geometry(geom, inplace=True) self.assert_(ret is None) geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs) assert_geoseries_equal(self.df.geometry, geom) def test_to_json(self): text = self.df.to_json() data = json.loads(text) self.assertTrue(data['type'] == 'FeatureCollection') self.assertTrue(len(data['features']) == 5) def test_to_json_na(self): # Set a value as nan and make sure it's written self.df['Shape_Area'][self.df['BoroName']=='Queens'] = np.nan text = self.df.to_json() data = json.loads(text) self.assertTrue(len(data['features']) == 5) for f in data['features']: props = f['properties'] self.assertEqual(len(props), 4) if props['BoroName'] == 'Queens': self.assertTrue(props['Shape_Area'] is None) def test_to_json_dropna(self): self.df['Shape_Area'][self.df['BoroName']=='Queens'] = np.nan self.df['Shape_Leng'][self.df['BoroName']=='Bronx'] = np.nan text = self.df.to_json(na='drop') data = json.loads(text) self.assertEqual(len(data['features']), 5) for f in data['features']: props = f['properties'] if props['BoroName'] == 'Queens': self.assertEqual(len(props), 3) self.assertTrue('Shape_Area' not in props) # Just make sure setting it to nan in a different row # doesn't affect this one self.assertTrue('Shape_Leng' in props) elif props['BoroName'] == 'Bronx': self.assertEqual(len(props), 3) self.assertTrue('Shape_Leng' not in props) self.assertTrue('Shape_Area' in props) else: self.assertEqual(len(props), 4) def test_to_json_keepna(self): self.df['Shape_Area'][self.df['BoroName']=='Queens'] = np.nan self.df['Shape_Leng'][self.df['BoroName']=='Bronx'] = np.nan text = self.df.to_json(na='keep') data = json.loads(text) self.assertEqual(len(data['features']), 5) for f in data['features']: props = f['properties'] self.assertEqual(len(props), 4) if props['BoroName'] == 'Queens': self.assertTrue(np.isnan(props['Shape_Area'])) # Just make sure setting it to nan in a different row # doesn't affect this one self.assertTrue('Shape_Leng' in props) elif props['BoroName'] == 'Bronx': self.assertTrue(np.isnan(props['Shape_Leng'])) self.assertTrue('Shape_Area' in props) def test_copy(self): df2 = self.df.copy() self.assertTrue(type(df2) is GeoDataFrame) self.assertEqual(self.df.crs, df2.crs) def test_to_file(self): """ Test to_file and from_file """ tempfilename = os.path.join(self.tempdir, 'boros.shp') self.df.to_file(tempfilename) # Read layer back in? df = GeoDataFrame.from_file(tempfilename) self.assertTrue('geometry' in df) self.assertTrue(len(df) == 5) self.assertTrue(np.alltrue(df['BoroName'].values == self.boros)) def test_mixed_types_to_file(self): """ Test that mixed geometry types raise error when writing to file """ tempfilename = os.path.join(self.tempdir, 'test.shp') s = GeoDataFrame({'geometry' : [Point(0, 0), Polygon([(0, 0), (1, 0), (1, 1)])]}) with self.assertRaises(ValueError): s.to_file(tempfilename) def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df['BoroName'].str.contains('B')] self.assertTrue(len(df) == 2) boros = df['BoroName'].values self.assertTrue('Brooklyn' in boros) self.assertTrue('Bronx' in boros) self.assertTrue(type(df) is GeoDataFrame) def test_transform(self): df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) self.assertTrue(all(df2['geometry'].almost_equals(utm['geometry'], decimal=2))) def test_from_postgis_default(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise unittest.case.SkipTest() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con) finally: con.close() validate_boro_df(self, df) def test_from_postgis_custom_geom_col(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise unittest.case.SkipTest() try: sql = """SELECT borocode, boroname, shape_leng, shape_area, geom AS __geometry__ FROM nybb;""" df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__') finally: con.close() validate_boro_df(self, df) def test_dataframe_to_geodataframe(self): df = pd.DataFrame({"A": range(len(self.df)), "location": list(self.df.geometry)}, index=self.df.index) gf = df.set_geometry('location', crs=self.df.crs) self.assertIsInstance(df, pd.DataFrame) self.assertIsInstance(gf, GeoDataFrame) assert_geoseries_equal(gf.geometry, self.df.geometry) self.assertEqual(gf.geometry.name, 'location') self.assert_('geometry' not in gf) gf2 = df.set_geometry('location', crs=self.df.crs, drop=True) self.assertIsInstance(df, pd.DataFrame) self.assertIsInstance(gf2, GeoDataFrame) self.assertEqual(gf2.geometry.name, 'geometry') self.assert_('geometry' in gf2) self.assert_('location' not in gf2) self.assert_('location' in df) # should be a copy df.ix[0, "A"] = 100 self.assertEqual(gf.ix[0, "A"], 0) self.assertEqual(gf2.ix[0, "A"], 0) with self.assertRaises(ValueError): df.set_geometry('location', inplace=True)
class TestDataFrame(unittest.TestCase): def setUp(self): N = 10 nybb_filename = tests.util.download_nybb() self.df = read_file('/nybb_13a/nybb.shp', vfs='zip://' + nybb_filename) self.tempdir = tempfile.mkdtemp() self.boros = np.array(['Staten Island', 'Queens', 'Brooklyn', 'Manhattan', 'Bronx']) self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([ {'geometry' : Point(x, y), 'value1': x + y, 'value2': x * y} for x, y in zip(range(N), range(N))], crs=self.crs) def tearDown(self): shutil.rmtree(self.tempdir) def test_df_init(self): self.assertTrue(type(self.df2) is GeoDataFrame) self.assertTrue(self.df2.crs == self.crs) def test_set_geometry(self): geom = [Point(x,y) for x,y in zip(range(5), range(5))] df2 = self.df.set_geometry(geom) self.assert_(self.df is not df2) for x, y in zip(df2.geometry.values, geom): self.assertEqual(x, y) def test_set_geometry_col(self): g = self.df.geometry g_simplified = g.simplify(100) self.df['simplified_geometry'] = g_simplified df2 = self.df.set_geometry('simplified_geometry') # Drop is true by default self.assert_('simplified_geometry' not in df2) for x, y in zip(df2.geometry.values, g_simplified): self.assertEqual(x, y) def test_set_geometry_col_no_drop(self): g = self.df.geometry g_simplified = g.simplify(100) self.df['simplified_geometry'] = g_simplified df2 = self.df.set_geometry('simplified_geometry', drop=False) self.assert_('simplified_geometry' in df2) for x, y in zip(df2.geometry.values, g_simplified): self.assertEqual(x, y) def test_set_geometry_inplace(self): geom = [Point(x,y) for x,y in zip(range(5), range(5))] ret = self.df.set_geometry(geom, inplace=True) self.assert_(ret is None) for x, y in zip(self.df['geometry'].values, geom): self.assertEqual(x, y) def test_to_json(self): text = self.df.to_json() data = json.loads(text) self.assertTrue(data['type'] == 'FeatureCollection') self.assertTrue(len(data['features']) == 5) def test_copy(self): df2 = self.df.copy() self.assertTrue(type(df2) is GeoDataFrame) self.assertEqual(self.df.crs, df2.crs) def test_to_file(self): """ Test to_file and from_file """ tempfilename = os.path.join(self.tempdir, 'boros.shp') self.df.to_file(tempfilename) # Read layer back in? df = GeoDataFrame.from_file(tempfilename) self.assertTrue('geometry' in df) self.assertTrue(len(df) == 5) self.assertTrue(np.alltrue(df['BoroName'].values == self.boros)) def test_mixed_types_to_file(self): """ Test that mixed geometry types raise error when writing to file """ tempfilename = os.path.join(self.tempdir, 'test.shp') s = GeoDataFrame({'geometry' : [Point(0, 0), Polygon([(0, 0), (1, 0), (1, 1)])]}) with self.assertRaises(ValueError): s.to_file(tempfilename) def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df['BoroName'].str.contains('B')] self.assertTrue(len(df) == 2) boros = df['BoroName'].values self.assertTrue('Brooklyn' in boros) self.assertTrue('Bronx' in boros) self.assertTrue(type(df) is GeoDataFrame) def test_transform(self): df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) self.assertTrue(all(df2['geometry'].almost_equals(utm['geometry'], decimal=2))) def test_from_postgis_default(self): con = tests.util.connect('test_geopandas') if con is None or not tests.util.create_db(self.df): raise unittest.case.SkipTest() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con) finally: con.close() tests.util.validate_boro_df(self, df) def test_from_postgis_custom_geom_col(self): con = tests.util.connect('test_geopandas') if con is None or not tests.util.create_db(self.df): raise unittest.case.SkipTest() try: sql = """SELECT borocode, boroname, shape_leng, shape_area, geom AS __geometry__ FROM nybb;""" df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__') finally: con.close() tests.util.validate_boro_df(self, df)
class TestTrajectoryCollection: def setup_method(self): df = pd.DataFrame( [ [1, "A", Point(0, 0), datetime(2018, 1, 1, 12, 0, 0), 9, "a"], [1, "A", Point(6, 0), datetime(2018, 1, 1, 12, 6, 0), 5, "b"], [1, "A", Point(6, 6), datetime(2018, 1, 1, 14, 10, 0), 2, "c"], [1, "A", Point(9, 9), datetime(2018, 1, 1, 14, 15, 0), 4, "d"], [ 2, "A", Point(10, 10), datetime(2018, 1, 1, 12, 0, 0), 10, "e" ], [ 2, "A", Point(16, 10), datetime(2018, 1, 1, 12, 6, 0), 6, "f" ], [ 2, "A", Point(16, 16), datetime(2018, 1, 2, 13, 10, 0), 7, "g" ], [ 2, "A", Point(190, 10), datetime(2018, 1, 2, 13, 15, 0), 3, "h" ], ], columns=["id", "obj", "geometry", "t", "val", "val2"], ).set_index("t") self.geo_df = GeoDataFrame(df, crs=CRS_METRIC) self.collection = TrajectoryCollection(self.geo_df, "id", obj_id_col="obj") self.geo_df_latlon = GeoDataFrame(df, crs=CRS_LATLON) self.collection_latlon = TrajectoryCollection(self.geo_df_latlon, "id", obj_id_col="obj") def test_number_of_trajectories(self): assert len(self.collection) == 2 def test_number_of_trajectories_min_length(self): collection = TrajectoryCollection(self.geo_df, "id", obj_id_col="obj", min_length=100) assert len(collection) == 1 def test_number_of_trajectories_min_length_never_reached(self): collection = TrajectoryCollection(self.geo_df, "id", obj_id_col="obj", min_length=1000) assert len(collection) == 0 def test_number_of_trajectories_min_duration(self): collection = TrajectoryCollection(self.geo_df, "id", obj_id_col="obj", min_duration=timedelta(days=1)) assert len(collection) == 1 def test_number_of_trajectories_min_duration_from_list(self): collection = TrajectoryCollection(self.collection.trajectories, min_duration=timedelta(days=1)) assert len(collection) == 1 def test_number_of_trajectories_min_duration_never_reached(self): collection = TrajectoryCollection(self.geo_df, "id", obj_id_col="obj", min_duration=timedelta(weeks=1)) assert len(collection) == 0 def test_get_trajectory(self): assert self.collection.get_trajectory(1).id == 1 assert self.collection.get_trajectory(1).obj_id == "A" assert self.collection.get_trajectory(2).id == 2 assert self.collection.get_trajectory(3) is None def test_get_locations_at(self): locs = self.collection.get_locations_at(datetime(2018, 1, 1, 12, 6, 0)) assert len(locs) == 2 assert locs.iloc[0].geometry in [Point(6, 0), Point(16, 10)] assert locs.iloc[0].val in [5, 6] assert locs.iloc[1].geometry in [Point(6, 0), Point(16, 10)] assert locs.iloc[0].geometry != locs.iloc[1].geometry def test_get_locations_at_needing_interpolation(self): locs = self.collection.get_locations_at(datetime(2018, 1, 1, 12, 6, 1)) assert len(locs) == 2 assert locs.iloc[0].val in [5, 6] def test_get_locations_at_out_of_time_range(self): locs = self.collection.get_locations_at(datetime(2017, 1, 1, 12, 6, 1)) assert len(locs) == 0 def test_get_start_locations(self): locs = self.collection.get_start_locations() assert len(locs) == 2 assert locs.iloc[0].geometry in [Point(0, 0), Point(10, 10)] assert locs.iloc[0].id in [1, 2] assert locs.iloc[0].obj == "A" assert locs.iloc[0].val in [9, 10] assert locs.iloc[0].val2 in ["a", "e"] assert locs.iloc[1].geometry in [Point(0, 0), Point(10, 10)] assert locs.iloc[0].geometry != locs.iloc[1].geometry assert isinstance(locs, GeoDataFrame) def test_get_end_locations(self): locs = self.collection.get_end_locations() assert len(locs) == 2 assert locs.iloc[0].geometry in [Point(9, 9), Point(190, 10)] assert locs.iloc[0].id in [1, 2] assert locs.iloc[0].obj == "A" assert locs.iloc[0].val in [4, 3] assert locs.iloc[0].val2 in ["d", "h"] assert locs.iloc[1].geometry in [Point(9, 9), Point(190, 10)] assert locs.iloc[0].geometry != locs.iloc[1].geometry assert isinstance(locs, GeoDataFrame) def test_get_intersecting(self): polygon = Polygon([(-1, -1), (-1, 1), (1, 1), (1, -1), (-1, -1)]) collection = self.collection.get_intersecting(polygon) assert len(collection) == 1 assert collection.trajectories[0] == self.collection.trajectories[0] def test_clip(self): polygon = Polygon([(-1, -1), (-1, 1), (1, 1), (1, -1), (-1, -1)]) collection = self.collection.clip(polygon) assert len(collection) == 1 assert collection.trajectories[0].to_linestring( ).wkt == "LINESTRING (0 0, 1 0)" def test_filter(self): assert len(self.collection.filter("obj", "A")) == 2 assert len(self.collection.filter("obj", ["A"])) == 2 assert len(self.collection.filter("obj", ["B"])) == 0 assert len(self.collection.filter("obj", [1])) == 0 def test_get_min_and_max(self): assert self.collection.get_min("val") == 2 assert self.collection.get_max("val") == 10 def test_plot_exists(self): from matplotlib.axes import Axes result = self.collection.plot() assert isinstance(result, Axes) def test_hvplot_exists(self): import holoviews result = self.collection_latlon.hvplot() assert isinstance(result, holoviews.core.overlay.Overlay) def test_plot_exist_column(self): from matplotlib.axes import Axes result = self.collection.plot(column="val") assert isinstance(result, Axes) def test_plot_speed_not_altering_collection(self): self.collection.plot(column="speed") assert all([ "speed" not in traj.df.columns.values for traj in self.collection.trajectories ]) def test_traj_with_less_than_two_points(self): df = pd.DataFrame( [[1, "A", Point(0, 0), datetime(2018, 1, 1, 12, 0, 0), 9, "a"]], columns=["id", "obj", "geometry", "t", "val", "val2"], ).set_index("t") geo_df = GeoDataFrame(df, crs=CRS_METRIC) tc = TrajectoryCollection(geo_df, "id", obj_id_col="obj") assert len(tc) == 0 def test_traj_with_two_points_at_the_same_time(self): df = pd.DataFrame( [ [1, "A", Point(0, 0), datetime(2018, 1, 1, 12, 0, 0), 9, "a"], [1, "A", Point(0, 0), datetime(2018, 1, 1, 12, 0, 0), 9, "a"], ], columns=["id", "obj", "geometry", "t", "val", "val2"], ).set_index("t") geo_df = GeoDataFrame(df, crs=CRS_METRIC) tc = TrajectoryCollection(geo_df, "id", obj_id_col="obj") assert len(tc) == 0 def test_iteration(self): assert sum([1 for _ in self.collection]) == len(self.collection) def test_iteration_error(self): def filter_trajectory(trajectory): trajectory.df = trajectory.df[trajectory.df["val"] >= 7] return trajectory trajs = [filter_trajectory(traj) for traj in self.collection] lengths = (1, 2) for i, traj in enumerate(trajs): assert len(traj.df) == lengths[i] collection = copy(self.collection) collection.trajectories = trajs with pytest.raises(ValueError): for _ in collection: pass def test_add_traj_id(self): self.collection.add_traj_id() result1 = self.collection.trajectories[0].df[TRAJ_ID_COL_NAME].tolist() assert result1 == [1, 1, 1, 1] result2 = self.collection.trajectories[1].df[TRAJ_ID_COL_NAME].tolist() assert result2 == [2, 2, 2, 2] def test_add_traj_id_overwrite_raises_error(self): gdf = self.geo_df.copy() gdf[TRAJ_ID_COL_NAME] = "a" collection = TrajectoryCollection(gdf, "id", obj_id_col="obj") with pytest.raises(RuntimeError): collection.add_traj_id() def test_to_point_gdf(self): point_gdf = self.collection.to_point_gdf() point_gdf.to_file("temp.gpkg", layer="points", driver="GPKG") assert_frame_equal(point_gdf, self.geo_df) def test_to_line_gdf(self): temp_df = self.geo_df.drop(columns=["obj", "val", "val2"]) tc = TrajectoryCollection(temp_df, "id") line_gdf = tc.to_line_gdf() line_gdf.to_file("temp.gpkg", layer="lines", driver="GPKG") t1 = [ datetime(2018, 1, 1, 12, 0), datetime(2018, 1, 1, 12, 6), datetime(2018, 1, 1, 14, 10), datetime(2018, 1, 1, 14, 15), ] t2 = [ datetime(2018, 1, 1, 12, 0, 0), datetime(2018, 1, 1, 12, 6, 0), datetime(2018, 1, 2, 13, 10, 0), datetime(2018, 1, 2, 13, 15, 0), ] df2 = pd.DataFrame( [ [1, t1[1], t1[0], LineString([(0, 0), (6, 0)])], [1, t1[2], t1[1], LineString([(6, 0), (6, 6)])], [1, t1[3], t1[2], LineString([(6, 6), (9, 9)])], [2, t2[1], t2[0], LineString([(10, 10), (16, 10)])], [2, t2[2], t2[1], LineString([(16, 10), (16, 16)])], [2, t2[3], t2[2], LineString([(16, 16), (190, 10)])], ], columns=["id", "t", "prev_t", "geometry"], ) expected_line_gdf = GeoDataFrame(df2, crs=CRS_METRIC) assert_frame_equal(line_gdf, expected_line_gdf) def test_to_traj_gdf(self): temp_df = self.geo_df.drop(columns=["obj", "val", "val2"]) tc = TrajectoryCollection(temp_df, "id") traj_gdf = tc.to_traj_gdf() traj_gdf.to_file("temp.gpkg", layer="trajs", driver="GPKG") rows = [ { "traj_id": 1, "start_t": datetime(2018, 1, 1, 12, 0, 0), "end_t": datetime(2018, 1, 1, 14, 15, 0), "geometry": LineString([(0, 0), (6, 0), (6, 6), (9, 9)]), "length": 12 + sqrt(18), "direction": 45.0, }, { "traj_id": 2, "start_t": datetime(2018, 1, 1, 12, 0, 0), "end_t": datetime(2018, 1, 2, 13, 15, 0), "geometry": LineString([(10, 10), (16, 10), (16, 16), (190, 10)]), "length": 12 + sqrt(174 * 174 + 36), "direction": 90.0, }, ] df2 = pd.DataFrame(rows) expected_line_gdf = GeoDataFrame(df2, crs=CRS_METRIC) assert_frame_equal(traj_gdf, expected_line_gdf)
def __post_proc_input_gdf(self, input_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: input_gdf_proceed = input_gdf.copy(deep=True) input_gdf_proceed = self.__reprojection(input_gdf_proceed) return input_gdf_proceed
class TestOverlayNYBB: def setup_method(self): N = 10 nybb_filename = geopandas.datasets.get_path('nybb') self.polydf = read_file(nybb_filename) self.crs = {'init': 'epsg:4326'} b = [int(x) for x in self.polydf.total_bounds] self.polydf2 = GeoDataFrame( [{'geometry': Point(x, y).buffer(10000), 'value1': x + y, 'value2': x - y} for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)), range(b[1], b[3], int((b[3]-b[1])/N)))], crs=self.crs) self.pointdf = GeoDataFrame( [{'geometry': Point(x, y), 'value1': x + y, 'value2': x - y} for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)), range(b[1], b[3], int((b[3]-b[1])/N)))], crs=self.crs) # TODO this appears to be necessary; # why is the sindex not generated automatically? self.polydf2._generate_sindex() self.union_shape = (180, 7) def test_union(self): df = overlay(self.polydf, self.polydf2, how="union") assert type(df) is GeoDataFrame assert df.shape == self.union_shape assert 'value1' in df.columns and 'Shape_Area' in df.columns def test_union_no_index(self): # explicitly ignore indices dfB = overlay(self.polydf, self.polydf2, how="union", use_sindex=False) assert dfB.shape == self.union_shape # remove indices from df self.polydf._sindex = None self.polydf2._sindex = None dfC = overlay(self.polydf, self.polydf2, how="union") assert dfC.shape == self.union_shape def test_union_non_numeric_index(self): import string letters = list(string.ascii_letters) polydf_alpha = self.polydf.copy() polydf2_alpha = self.polydf2.copy() polydf_alpha.index = letters[:len(polydf_alpha)] polydf2_alpha.index = letters[:len(polydf2_alpha)] df = overlay(polydf_alpha, polydf2_alpha, how="union") assert type(df) is GeoDataFrame assert df.shape == self.union_shape assert 'value1' in df.columns and 'Shape_Area' in df.columns def test_intersection(self): df = overlay(self.polydf, self.polydf2, how="intersection") assert df['BoroName'][0] is not None assert df.shape == (68, 7) def test_identity(self): df = overlay(self.polydf, self.polydf2, how="identity") assert df.shape == (154, 7) def test_symmetric_difference(self): df = overlay(self.polydf, self.polydf2, how="symmetric_difference") assert df.shape == (122, 7) def test_difference(self): df = overlay(self.polydf, self.polydf2, how="difference") assert df.shape == (86, 7) def test_bad_how(self): with pytest.raises(ValueError): overlay(self.polydf, self.polydf, how="spandex") def test_nonpoly(self): with pytest.raises(TypeError): overlay(self.pointdf, self.polydf, how="union") def test_duplicate_column_name(self): polydf2r = self.polydf2.rename(columns={'value2': 'Shape_Area'}) df = overlay(self.polydf, polydf2r, how="union") assert 'Shape_Area_2' in df.columns and 'Shape_Area' in df.columns def test_geometry_not_named_geometry(self): # Issue #306 # Add points and flip names polydf3 = self.polydf.copy() polydf3 = polydf3.rename(columns={'geometry': 'polygons'}) polydf3 = polydf3.set_geometry('polygons') polydf3['geometry'] = self.pointdf.geometry.loc[0:4] assert polydf3.geometry.name == 'polygons' df = overlay(polydf3, self.polydf2, how="union") assert type(df) is GeoDataFrame df2 = overlay(self.polydf, self.polydf2, how="union") assert df.geom_almost_equals(df2).all() def test_geoseries_warning(self): # Issue #305 with pytest.raises(NotImplementedError): overlay(self.polydf, self.polydf2.geometry, how="union")
class TestDataFrame(unittest.TestCase): def setUp(self): N = 10 # Data from http://www.nyc.gov/html/dcp/download/bytes/nybb_13a.zip # saved as geopandas/examples/nybb_13a.zip. if not os.path.exists(os.path.join('examples', 'nybb_13a.zip')): with open(os.path.join('examples', 'nybb_13a.zip'), 'w') as f: response = urllib2.urlopen('http://www.nyc.gov/html/dcp/download/bytes/nybb_13a.zip') f.write(response.read()) self.df = GeoDataFrame.from_file( '/nybb_13a/nybb.shp', vfs='zip://examples/nybb_13a.zip') self.tempdir = tempfile.mkdtemp() self.boros = np.array(['Staten Island', 'Queens', 'Brooklyn', 'Manhattan', 'Bronx']) self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([ {'geometry' : Point(x, y), 'value1': x + y, 'value2': x * y} for x, y in zip(range(N), range(N))], crs=self.crs) # Try to create the database, skip the db tests if something goes # wrong # If you'd like these tests to run, create a database called # 'test_geopandas' and enable postgis in it: # > createdb test_geopandas # > psql -c "CREATE EXTENSION postgis" -d test_geopandas try: self._create_db() self.run_db_test = True except (NameError, OperationalError): # NameError is thrown if psycopg2 fails to import at top of file # OperationalError is thrown if we can't connect to the database self.run_db_test = False def _create_db(self): con = psycopg2.connect(dbname='test_geopandas') cursor = con.cursor() cursor.execute("DROP TABLE IF EXISTS nybb;") sql = """CREATE TABLE nybb ( geom geometry, borocode integer, boroname varchar(40), shape_leng float, shape_area float );""" cursor.execute(sql) for i, row in self.df.iterrows(): sql = """INSERT INTO nybb VALUES ( ST_GeometryFromText(%s), %s, %s, %s, %s );""" cursor.execute(sql, (row['geometry'].wkt, row['BoroCode'], row['BoroName'], row['Shape_Leng'], row['Shape_Area'])) cursor.close() con.commit() con.close() def tearDown(self): shutil.rmtree(self.tempdir) def test_df_init(self): self.assertTrue(type(self.df2) is GeoDataFrame) self.assertTrue(self.df2.crs == self.crs) def test_to_json(self): text = self.df.to_json() data = json.loads(text) self.assertTrue(data['type'] == 'FeatureCollection') self.assertTrue(len(data['features']) == 5) def test_copy(self): df2 = self.df.copy() self.assertTrue(type(df2) is GeoDataFrame) self.assertEqual(self.df.crs, df2.crs) def test_to_file(self): """ Test to_file and from_file """ tempfilename = os.path.join(self.tempdir, 'boros.shp') self.df.to_file(tempfilename) # Read layer back in? df = GeoDataFrame.from_file(tempfilename) self.assertTrue('geometry' in df) self.assertTrue(len(df) == 5) self.assertTrue(np.alltrue(df['BoroName'].values == self.boros)) def test_mixed_types_to_file(self): """ Test that mixed geometry types raise error when writing to file """ tempfilename = os.path.join(self.tempdir, 'test.shp') s = GeoDataFrame({'geometry' : [Point(0, 0), Polygon([(0, 0), (1, 0), (1, 1)])]}) with self.assertRaises(ValueError): s.to_file(tempfilename) def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df['BoroName'].str.contains('B')] self.assertTrue(len(df) == 2) boros = df['BoroName'].values self.assertTrue('Brooklyn' in boros) self.assertTrue('Bronx' in boros) self.assertTrue(type(df) is GeoDataFrame) def test_transform(self): df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) self.assertTrue(all(df2['geometry'].almost_equals(utm['geometry'], decimal=2))) def _validate_sql(self, df): # Make sure all the columns are there and the geometries # were properly loaded as MultiPolygons self.assertEqual(len(df), 5) columns = ('borocode', 'boroname', 'shape_leng', 'shape_area') for col in columns: self.assertTrue(col in df.columns, 'Column {} missing'.format(col)) self.assertTrue(all(df['geometry'].type == 'MultiPolygon')) def test_from_postgis_default(self): if not self.run_db_test: raise unittest.case.SkipTest() with psycopg2.connect(dbname='test_geopandas') as con: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con) self._validate_sql(df) def test_from_postgis_custom_geom_col(self): if not self.run_db_test: raise unittest.case.SkipTest() with psycopg2.connect(dbname='test_geopandas') as con: sql = """SELECT borocode, boroname, shape_leng, shape_area, geom AS __geometry__ FROM nybb;""" df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__') self._validate_sql(df)
def dfs(request): polys1 = GeoSeries( [ Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]), Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]), Polygon([(6, 0), (9, 0), (9, 3), (6, 3)]), ] ) polys2 = GeoSeries( [ Polygon([(1, 1), (4, 1), (4, 4), (1, 4)]), Polygon([(4, 4), (7, 4), (7, 7), (4, 7)]), Polygon([(7, 7), (10, 7), (10, 10), (7, 10)]), ] ) df1 = GeoDataFrame({"geometry": polys1, "df1": [0, 1, 2]}) df2 = GeoDataFrame({"geometry": polys2, "df2": [3, 4, 5]}) if request.param == "string-index": df1.index = ["a", "b", "c"] df2.index = ["d", "e", "f"] if request.param == "named-index": df1.index.name = "df1_ix" df2.index.name = "df2_ix" if request.param == "multi-index": i1 = ["a", "b", "c"] i2 = ["d", "e", "f"] df1 = df1.set_index([i1, i2]) df2 = df2.set_index([i2, i1]) if request.param == "named-multi-index": i1 = ["a", "b", "c"] i2 = ["d", "e", "f"] df1 = df1.set_index([i1, i2]) df2 = df2.set_index([i2, i1]) df1.index.names = ["df1_ix1", "df1_ix2"] df2.index.names = ["df2_ix1", "df2_ix2"] # construction expected frames expected = {} part1 = df1.copy().reset_index().rename(columns={"index": "index_left"}) part2 = ( df2.copy() .iloc[[0, 1, 1, 2]] .reset_index() .rename(columns={"index": "index_right"}) ) part1["_merge"] = [0, 1, 2] part2["_merge"] = [0, 0, 1, 3] exp = pd.merge(part1, part2, on="_merge", how="outer") expected["intersects"] = exp.drop("_merge", axis=1).copy() part1 = df1.copy().reset_index().rename(columns={"index": "index_left"}) part2 = df2.copy().reset_index().rename(columns={"index": "index_right"}) part1["_merge"] = [0, 1, 2] part2["_merge"] = [0, 3, 3] exp = pd.merge(part1, part2, on="_merge", how="outer") expected["contains"] = exp.drop("_merge", axis=1).copy() part1["_merge"] = [0, 1, 2] part2["_merge"] = [3, 1, 3] exp = pd.merge(part1, part2, on="_merge", how="outer") expected["within"] = exp.drop("_merge", axis=1).copy() return [request.param, df1, df2, expected]
class TestDataFrame: def setup_method(self): N = 10 nybb_filename = geopandas.datasets.get_path('nybb') self.df = read_file(nybb_filename) self.tempdir = tempfile.mkdtemp() self.boros = self.df['BoroName'] self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([ {'geometry': Point(x, y), 'value1': x + y, 'value2': x * y} for x, y in zip(range(N), range(N))], crs=self.crs) self.df3 = read_file( os.path.join(PACKAGE_DIR, 'examples', 'null_geom.geojson')) self.line_paths = self.df3['Name'] def teardown_method(self): shutil.rmtree(self.tempdir) def test_df_init(self): assert type(self.df2) is GeoDataFrame assert self.df2.crs == self.crs def test_different_geo_colname(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry='location') locs = GeoSeries(data['location'], crs=self.crs) assert_geoseries_equal(df.geometry, locs) assert 'geometry' not in df assert df.geometry.name == 'location' # internal implementation detail assert df._geometry_column_name == 'location' geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))] df2 = df.set_geometry(geom2, crs='dummy_crs') assert 'location' in df2 assert df2.crs == 'dummy_crs' assert df2.geometry.crs == 'dummy_crs' # reset so it outputs okay df2.crs = df.crs assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs)) def test_geo_getitem(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry='location') assert isinstance(df.geometry, GeoSeries) df['geometry'] = df["A"] assert isinstance(df.geometry, GeoSeries) assert df.geometry[0] == data['location'][0] # good if this changed in the future assert not isinstance(df['geometry'], GeoSeries) assert isinstance(df['location'], GeoSeries) data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5), range(5))] df = GeoDataFrame(data, crs=self.crs) assert isinstance(df.geometry, GeoSeries) assert isinstance(df['geometry'], GeoSeries) # good if this changed in the future assert not isinstance(df['location'], GeoSeries) def test_geometry_property(self): assert_geoseries_equal(self.df.geometry, self.df['geometry'], check_dtype=True, check_index_type=True) df = self.df.copy() new_geom = [Point(x, y) for x, y in zip(range(len(self.df)), range(len(self.df)))] df.geometry = new_geom new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs) assert_geoseries_equal(df.geometry, new_geom) assert_geoseries_equal(df['geometry'], new_geom) # new crs gs = GeoSeries(new_geom, crs="epsg:26018") df.geometry = gs assert df.crs == "epsg:26018" def test_geometry_property_errors(self): with pytest.raises(AttributeError): df = self.df.copy() del df['geometry'] df.geometry # list-like error with pytest.raises(ValueError): df = self.df2.copy() df.geometry = 'value1' # list-like error with pytest.raises(ValueError): df = self.df.copy() df.geometry = 'apple' # non-geometry error with pytest.raises(TypeError): df = self.df.copy() df.geometry = list(range(df.shape[0])) with pytest.raises(KeyError): df = self.df.copy() del df['geometry'] df['geometry'] # ndim error with pytest.raises(ValueError): df = self.df.copy() df.geometry = df def test_set_geometry(self): geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))]) original_geom = self.df.geometry df2 = self.df.set_geometry(geom) assert self.df is not df2 assert_geoseries_equal(df2.geometry, geom) assert_geoseries_equal(self.df.geometry, original_geom) assert_geoseries_equal(self.df['geometry'], self.df.geometry) # unknown column with pytest.raises(ValueError): self.df.set_geometry('nonexistent-column') # ndim error with pytest.raises(ValueError): self.df.set_geometry(self.df) # new crs - setting should default to GeoSeries' crs gs = GeoSeries(geom, crs="epsg:26018") new_df = self.df.set_geometry(gs) assert new_df.crs == "epsg:26018" # explicit crs overrides self and dataframe new_df = self.df.set_geometry(gs, crs="epsg:27159") assert new_df.crs == "epsg:27159" assert new_df.geometry.crs == "epsg:27159" # Series should use dataframe's new_df = self.df.set_geometry(geom.values) assert new_df.crs == self.df.crs assert new_df.geometry.crs == self.df.crs def test_set_geometry_col(self): g = self.df.geometry g_simplified = g.simplify(100) self.df['simplified_geometry'] = g_simplified df2 = self.df.set_geometry('simplified_geometry') # Drop is false by default assert 'simplified_geometry' in df2 assert_geoseries_equal(df2.geometry, g_simplified) # If True, drops column and renames to geometry df3 = self.df.set_geometry('simplified_geometry', drop=True) assert 'simplified_geometry' not in df3 assert_geoseries_equal(df3.geometry, g_simplified) def test_set_geometry_inplace(self): geom = [Point(x, y) for x, y in zip(range(5), range(5))] ret = self.df.set_geometry(geom, inplace=True) assert ret is None geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs) assert_geoseries_equal(self.df.geometry, geom) def test_set_geometry_series(self): # Test when setting geometry with a Series that # alignment will occur # # Reverse the index order # Set the Series to be Point(i,i) where i is the index self.df.index = range(len(self.df)-1, -1, -1) d = {} for i in range(len(self.df)): d[i] = Point(i, i) g = GeoSeries(d) # At this point, the DataFrame index is [4,3,2,1,0] and the # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns # them to match indexes df = self.df.set_geometry(g) for i, r in df.iterrows(): assert i == r['geometry'].x assert i == r['geometry'].y def test_to_json(self): text = self.df.to_json() data = json.loads(text) assert data['type'] == 'FeatureCollection' assert len(data['features']) == 5 def test_to_json_geom_col(self): df = self.df.copy() df['geom'] = df['geometry'] df['geometry'] = np.arange(len(df)) df.set_geometry('geom', inplace=True) text = df.to_json() data = json.loads(text) assert data['type'] == 'FeatureCollection' assert len(data['features']) == 5 def test_to_json_na(self): # Set a value as nan and make sure it's written self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan text = self.df.to_json() data = json.loads(text) assert len(data['features']) == 5 for f in data['features']: props = f['properties'] assert len(props) == 4 if props['BoroName'] == 'Queens': assert props['Shape_Area'] is None def test_to_json_bad_na(self): # Check that a bad na argument raises error with pytest.raises(ValueError): self.df.to_json(na='garbage') def test_to_json_dropna(self): self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan text = self.df.to_json(na='drop') data = json.loads(text) assert len(data['features']) == 5 for f in data['features']: props = f['properties'] if props['BoroName'] == 'Queens': assert len(props) == 3 assert 'Shape_Area' not in props # Just make sure setting it to nan in a different row # doesn't affect this one assert 'Shape_Leng' in props elif props['BoroName'] == 'Bronx': assert len(props) == 3 assert 'Shape_Leng' not in props assert 'Shape_Area' in props else: assert len(props) == 4 def test_to_json_keepna(self): self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan text = self.df.to_json(na='keep') data = json.loads(text) assert len(data['features']) == 5 for f in data['features']: props = f['properties'] assert len(props) == 4 if props['BoroName'] == 'Queens': assert np.isnan(props['Shape_Area']) # Just make sure setting it to nan in a different row # doesn't affect this one assert 'Shape_Leng' in props elif props['BoroName'] == 'Bronx': assert np.isnan(props['Shape_Leng']) assert 'Shape_Area' in props def test_copy(self): df2 = self.df.copy() assert type(df2) is GeoDataFrame assert self.df.crs == df2.crs def test_to_file(self): """ Test to_file and from_file """ tempfilename = os.path.join(self.tempdir, 'boros.shp') self.df.to_file(tempfilename) # Read layer back in df = GeoDataFrame.from_file(tempfilename) assert 'geometry' in df assert len(df) == 5 assert np.alltrue(df['BoroName'].values == self.boros) # Write layer with null geometry out to file tempfilename = os.path.join(self.tempdir, 'null_geom.shp') self.df3.to_file(tempfilename) # Read layer back in df3 = GeoDataFrame.from_file(tempfilename) assert 'geometry' in df3 assert len(df3) == 2 assert np.alltrue(df3['Name'].values == self.line_paths) def test_to_file_types(self): """ Test various integer type columns (GH#93) """ tempfilename = os.path.join(self.tempdir, 'int.shp') int_types = [np.int, np.int8, np.int16, np.int32, np.int64, np.intp, np.uint8, np.uint16, np.uint32, np.uint64, np.long] geometry = self.df2.geometry data = dict((str(i), np.arange(len(geometry), dtype=dtype)) for i, dtype in enumerate(int_types)) df = GeoDataFrame(data, geometry=geometry) df.to_file(tempfilename) def test_mixed_types_to_file(self): """ Test that mixed geometry types raise error when writing to file """ tempfilename = os.path.join(self.tempdir, 'test.shp') s = GeoDataFrame({'geometry': [Point(0, 0), Polygon([(0, 0), (1, 0), (1, 1)])]}) with pytest.raises(ValueError): s.to_file(tempfilename) def test_to_file_schema(self): """ Ensure that the file is written according to the schema if it is specified """ from collections import OrderedDict tempfilename = os.path.join(self.tempdir, 'test.shp') properties = OrderedDict([ ('Shape_Leng', 'float:19.11'), ('BoroName', 'str:40'), ('BoroCode', 'int:10'), ('Shape_Area', 'float:19.11'), ]) schema = {'geometry': 'Polygon', 'properties': properties} # Take the first 2 features to speed things up a bit self.df.iloc[:2].to_file(tempfilename, schema=schema) with fiona.open(tempfilename) as f: result_schema = f.schema assert result_schema == schema def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df['BoroName'].str.contains('B')] assert len(df) == 2 boros = df['BoroName'].values assert 'Brooklyn' in boros assert 'Bronx' in boros assert type(df) is GeoDataFrame def test_coord_slice_points(self): assert self.df2.cx[-2:-1, -2:-1].empty assert_frame_equal(self.df2, self.df2.cx[:, :]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, :]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[:, 5:]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, 5:]) def test_transform(self): df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) assert all(df2['geometry'].geom_almost_equals(utm['geometry'], decimal=2)) def test_to_crs_geo_column_name(self): # Test to_crs() with different geometry column name (GH#339) df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} df2 = df2.rename(columns={'geometry': 'geom'}) df2.set_geometry('geom', inplace=True) lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) assert lonlat.geometry.name == 'geom' assert utm.geometry.name == 'geom' assert all(df2.geometry.geom_almost_equals(utm.geometry, decimal=2)) def test_from_features(self): nybb_filename = geopandas.datasets.get_path('nybb') with fiona.open(nybb_filename) as f: features = list(f) crs = f.crs df = GeoDataFrame.from_features(features, crs=crs) validate_boro_df(df, case_sensitive=True) assert df.crs == crs def test_from_features_unaligned_properties(self): p1 = Point(1, 1) f1 = {'type': 'Feature', 'properties': {'a': 0}, 'geometry': p1.__geo_interface__} p2 = Point(2, 2) f2 = {'type': 'Feature', 'properties': {'b': 1}, 'geometry': p2.__geo_interface__} p3 = Point(3, 3) f3 = {'type': 'Feature', 'properties': {'a': 2}, 'geometry': p3.__geo_interface__} df = GeoDataFrame.from_features([f1, f2, f3]) result = df[['a', 'b']] expected = pd.DataFrame.from_dict([{'a': 0, 'b': np.nan}, {'a': np.nan, 'b': 1}, {'a': 2, 'b': np.nan}]) assert_frame_equal(expected, result) def test_from_feature_collection(self): data = {'name': ['a', 'b', 'c'], 'lat': [45, 46, 47.5], 'lon': [-120, -121.2, -122.9]} df = pd.DataFrame(data) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] gdf = GeoDataFrame(df, geometry=geometry) # from_features returns sorted columns expected = gdf[['geometry', 'lat', 'lon', 'name']] # test FeatureCollection res = GeoDataFrame.from_features(gdf.__geo_interface__) assert_frame_equal(res, expected) # test list of Features res = GeoDataFrame.from_features(gdf.__geo_interface__['features']) assert_frame_equal(res, expected) # test __geo_interface__ attribute (a GeoDataFrame has one) res = GeoDataFrame.from_features(gdf) assert_frame_equal(res, expected) def test_from_postgis_default(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise pytest.skip() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con) finally: con.close() validate_boro_df(df, case_sensitive=False) def test_from_postgis_custom_geom_col(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise pytest.skip() try: sql = """SELECT borocode, boroname, shape_leng, shape_area, geom AS __geometry__ FROM nybb;""" df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__') finally: con.close() validate_boro_df(df, case_sensitive=False) def test_dataframe_to_geodataframe(self): df = pd.DataFrame({"A": range(len(self.df)), "location": list(self.df.geometry)}, index=self.df.index) gf = df.set_geometry('location', crs=self.df.crs) assert isinstance(df, pd.DataFrame) assert isinstance(gf, GeoDataFrame) assert_geoseries_equal(gf.geometry, self.df.geometry) assert gf.geometry.name == 'location' assert 'geometry' not in gf gf2 = df.set_geometry('location', crs=self.df.crs, drop=True) assert isinstance(df, pd.DataFrame) assert isinstance(gf2, GeoDataFrame) assert gf2.geometry.name == 'geometry' assert 'geometry' in gf2 assert 'location' not in gf2 assert 'location' in df # should be a copy df.ix[0, "A"] = 100 assert gf.ix[0, "A"] == 0 assert gf2.ix[0, "A"] == 0 with pytest.raises(ValueError): df.set_geometry('location', inplace=True) def test_geodataframe_geointerface(self): assert self.df.__geo_interface__['type'] == 'FeatureCollection' assert len(self.df.__geo_interface__['features']) == self.df.shape[0] def test_geodataframe_geojson_no_bbox(self): geo = self.df._to_geo(na="null", show_bbox=False) assert 'bbox' not in geo.keys() for feature in geo['features']: assert 'bbox' not in feature.keys() def test_geodataframe_geojson_bbox(self): geo = self.df._to_geo(na="null", show_bbox=True) assert 'bbox' in geo.keys() assert len(geo['bbox']) == 4 assert isinstance(geo['bbox'], tuple) for feature in geo['features']: assert 'bbox' in feature.keys() def test_pickle(self): filename = os.path.join(self.tempdir, 'df.pkl') self.df.to_pickle(filename) unpickled = pd.read_pickle(filename) assert_frame_equal(self.df, unpickled) assert self.df.crs == unpickled.crs
class TestDataFrame(unittest.TestCase): def setUp(self): N = 10 nybb_filename = tests.util.download_nybb() self.df = read_file('/nybb_13a/nybb.shp', vfs='zip://' + nybb_filename) self.tempdir = tempfile.mkdtemp() self.boros = np.array(['Staten Island', 'Queens', 'Brooklyn', 'Manhattan', 'Bronx']) self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([ {'geometry' : Point(x, y), 'value1': x + y, 'value2': x * y} for x, y in zip(range(N), range(N))], crs=self.crs) def tearDown(self): shutil.rmtree(self.tempdir) def test_df_init(self): self.assertTrue(type(self.df2) is GeoDataFrame) self.assertTrue(self.df2.crs == self.crs) def test_to_json(self): text = self.df.to_json() data = json.loads(text) self.assertTrue(data['type'] == 'FeatureCollection') self.assertTrue(len(data['features']) == 5) def test_copy(self): df2 = self.df.copy() self.assertTrue(type(df2) is GeoDataFrame) self.assertEqual(self.df.crs, df2.crs) def test_to_file(self): """ Test to_file and from_file """ tempfilename = os.path.join(self.tempdir, 'boros.shp') self.df.to_file(tempfilename) # Read layer back in? df = GeoDataFrame.from_file(tempfilename) self.assertTrue('geometry' in df) self.assertTrue(len(df) == 5) self.assertTrue(np.alltrue(df['BoroName'].values == self.boros)) def test_mixed_types_to_file(self): """ Test that mixed geometry types raise error when writing to file """ tempfilename = os.path.join(self.tempdir, 'test.shp') s = GeoDataFrame({'geometry' : [Point(0, 0), Polygon([(0, 0), (1, 0), (1, 1)])]}) with self.assertRaises(ValueError): s.to_file(tempfilename) def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df['BoroName'].str.contains('B')] self.assertTrue(len(df) == 2) boros = df['BoroName'].values self.assertTrue('Brooklyn' in boros) self.assertTrue('Bronx' in boros) self.assertTrue(type(df) is GeoDataFrame) def test_transform(self): df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) self.assertTrue(all(df2['geometry'].almost_equals(utm['geometry'], decimal=2))) def test_from_postgis_default(self): con = tests.util.connect('test_geopandas') if con is None or not tests.util.create_db(self.df): raise unittest.case.SkipTest() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con) finally: con.close() tests.util.validate_boro_df(self, df) def test_from_postgis_custom_geom_col(self): con = tests.util.connect('test_geopandas') if con is None or not tests.util.create_db(self.df): raise unittest.case.SkipTest() try: sql = """SELECT borocode, boroname, shape_leng, shape_area, geom AS __geometry__ FROM nybb;""" df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__') finally: con.close() tests.util.validate_boro_df(self, df)
class TestSpatialJoinNYBB: def setup_method(self): nybb_filename = geopandas.datasets.get_path("nybb") self.polydf = read_file(nybb_filename) self.crs = self.polydf.crs N = 20 b = [int(x) for x in self.polydf.total_bounds] self.pointdf = GeoDataFrame( [ {"geometry": Point(x, y), "pointattr1": x + y, "pointattr2": x - y} for x, y in zip( range(b[0], b[2], int((b[2] - b[0]) / N)), range(b[1], b[3], int((b[3] - b[1]) / N)), ) ], crs=self.crs, ) def test_geometry_name(self): # test sjoin is working with other geometry name polydf_original_geom_name = self.polydf.geometry.name self.polydf = self.polydf.rename(columns={"geometry": "new_geom"}).set_geometry( "new_geom" ) assert polydf_original_geom_name != self.polydf.geometry.name res = sjoin(self.polydf, self.pointdf, how="left") assert self.polydf.geometry.name == res.geometry.name def test_sjoin_left(self): df = sjoin(self.pointdf, self.polydf, how="left") assert df.shape == (21, 8) for i, row in df.iterrows(): assert row.geometry.type == "Point" assert "pointattr1" in df.columns assert "BoroCode" in df.columns def test_sjoin_right(self): # the inverse of left df = sjoin(self.pointdf, self.polydf, how="right") df2 = sjoin(self.polydf, self.pointdf, how="left") assert df.shape == (12, 8) assert df.shape == df2.shape for i, row in df.iterrows(): assert row.geometry.type == "MultiPolygon" for i, row in df2.iterrows(): assert row.geometry.type == "MultiPolygon" def test_sjoin_inner(self): df = sjoin(self.pointdf, self.polydf, how="inner") assert df.shape == (11, 8) def test_sjoin_op(self): # points within polygons df = sjoin(self.pointdf, self.polydf, how="left", op="within") assert df.shape == (21, 8) assert df.loc[1]["BoroName"] == "Staten Island" # points contain polygons? never happens so we should have nulls df = sjoin(self.pointdf, self.polydf, how="left", op="contains") assert df.shape == (21, 8) assert np.isnan(df.loc[1]["Shape_Area"]) def test_sjoin_bad_op(self): # AttributeError: 'Point' object has no attribute 'spandex' with pytest.raises(ValueError): sjoin(self.pointdf, self.polydf, how="left", op="spandex") def test_sjoin_duplicate_column_name(self): pointdf2 = self.pointdf.rename(columns={"pointattr1": "Shape_Area"}) df = sjoin(pointdf2, self.polydf, how="left") assert "Shape_Area_left" in df.columns assert "Shape_Area_right" in df.columns @pytest.mark.parametrize("how", ["left", "right", "inner"]) def test_sjoin_named_index(self, how): # original index names should be unchanged pointdf2 = self.pointdf.copy() pointdf2.index.name = "pointid" polydf = self.polydf.copy() polydf.index.name = "polyid" res = sjoin(pointdf2, polydf, how=how) assert pointdf2.index.name == "pointid" assert polydf.index.name == "polyid" # original index name should pass through to result if how == "right": assert res.index.name == "polyid" else: # how == "left", how == "inner" assert res.index.name == "pointid" def test_sjoin_values(self): # GH190 self.polydf.index = [1, 3, 4, 5, 6] df = sjoin(self.pointdf, self.polydf, how="left") assert df.shape == (21, 8) df = sjoin(self.polydf, self.pointdf, how="left") assert df.shape == (12, 8) @pytest.mark.xfail def test_no_overlapping_geometry(self): # Note: these tests are for correctly returning GeoDataFrame # when result of the join is empty df_inner = sjoin(self.pointdf.iloc[17:], self.polydf, how="inner") df_left = sjoin(self.pointdf.iloc[17:], self.polydf, how="left") df_right = sjoin(self.pointdf.iloc[17:], self.polydf, how="right") expected_inner_df = pd.concat( [ self.pointdf.iloc[:0], pd.Series(name="index_right", dtype="int64"), self.polydf.drop("geometry", axis=1).iloc[:0], ], axis=1, ) expected_inner = GeoDataFrame(expected_inner_df, crs="epsg:4326") expected_right_df = pd.concat( [ self.pointdf.drop("geometry", axis=1).iloc[:0], pd.concat( [ pd.Series(name="index_left", dtype="int64"), pd.Series(name="index_right", dtype="int64"), ], axis=1, ), self.polydf, ], axis=1, ) expected_right = GeoDataFrame(expected_right_df, crs="epsg:4326").set_index( "index_right" ) expected_left_df = pd.concat( [ self.pointdf.iloc[17:], pd.Series(name="index_right", dtype="int64"), self.polydf.iloc[:0].drop("geometry", axis=1), ], axis=1, ) expected_left = GeoDataFrame(expected_left_df, crs="epsg:4326") assert expected_inner.equals(df_inner) assert expected_right.equals(df_right) assert expected_left.equals(df_left) @pytest.mark.skip("Not implemented") def test_sjoin_outer(self): df = sjoin(self.pointdf, self.polydf, how="outer") assert df.shape == (21, 8) def test_sjoin_empty_geometries(self): # https://github.com/geopandas/geopandas/issues/944 empty = GeoDataFrame(geometry=[GeometryCollection()] * 3) df = sjoin(self.pointdf.append(empty), self.polydf, how="left") assert df.shape == (24, 8) df2 = sjoin(self.pointdf, self.polydf.append(empty), how="left") assert df2.shape == (21, 8)
class TestDataFrame: def setup_method(self): N = 10 nybb_filename = geopandas.datasets.get_path("nybb") self.df = read_file(nybb_filename) self.tempdir = tempfile.mkdtemp() self.crs = "epsg:4326" self.df2 = GeoDataFrame( [{ "geometry": Point(x, y), "value1": x + y, "value2": x * y } for x, y in zip(range(N), range(N))], crs=self.crs, ) self.df3 = read_file( os.path.join(PACKAGE_DIR, "examples", "null_geom.geojson")) def teardown_method(self): shutil.rmtree(self.tempdir) def test_df_init(self): assert type(self.df2) is GeoDataFrame assert self.df2.crs == self.crs def test_different_geo_colname(self): data = { "A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))], } df = GeoDataFrame(data, crs=self.crs, geometry="location") locs = GeoSeries(data["location"], crs=self.crs) assert_geoseries_equal(df.geometry, locs) assert "geometry" not in df assert df.geometry.name == "location" # internal implementation detail assert df._geometry_column_name == "location" geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))] with pytest.raises(CRSError): df.set_geometry(geom2, crs="dummy_crs") @pytest.mark.filterwarnings("ignore:Geometry is in a geographic CRS") def test_geo_getitem(self): data = { "A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))], } df = GeoDataFrame(data, crs=self.crs, geometry="location") assert isinstance(df.geometry, GeoSeries) df["geometry"] = df["A"] assert isinstance(df.geometry, GeoSeries) assert df.geometry[0] == data["location"][0] # good if this changed in the future assert not isinstance(df["geometry"], GeoSeries) assert isinstance(df["location"], GeoSeries) df["buff"] = df.buffer(1) assert isinstance(df["buff"], GeoSeries) df["array"] = from_shapely( [Point(x, y) for x, y in zip(range(5), range(5))]) assert isinstance(df["array"], GeoSeries) data["geometry"] = [ Point(x + 1, y - 1) for x, y in zip(range(5), range(5)) ] df = GeoDataFrame(data, crs=self.crs) assert isinstance(df.geometry, GeoSeries) assert isinstance(df["geometry"], GeoSeries) # good if this changed in the future assert not isinstance(df["location"], GeoSeries) def test_getitem_no_geometry(self): res = self.df2[["value1", "value2"]] assert isinstance(res, pd.DataFrame) assert not isinstance(res, GeoDataFrame) # with different name df = self.df2.copy() df = df.rename(columns={"geometry": "geom"}).set_geometry("geom") assert isinstance(df, GeoDataFrame) res = df[["value1", "value2"]] assert isinstance(res, pd.DataFrame) assert not isinstance(res, GeoDataFrame) df["geometry"] = np.arange(len(df)) res = df[["value1", "value2", "geometry"]] assert isinstance(res, pd.DataFrame) assert not isinstance(res, GeoDataFrame) def test_geo_setitem(self): data = { "A": range(5), "B": np.arange(5.0), "geometry": [Point(x, y) for x, y in zip(range(5), range(5))], } df = GeoDataFrame(data) s = GeoSeries([Point(x, y + 1) for x, y in zip(range(5), range(5))]) # setting geometry column for vals in [s, s.values]: df["geometry"] = vals assert_geoseries_equal(df["geometry"], s) assert_geoseries_equal(df.geometry, s) # non-aligned values s2 = GeoSeries([Point(x, y + 1) for x, y in zip(range(6), range(6))]) df["geometry"] = s2 assert_geoseries_equal(df["geometry"], s) assert_geoseries_equal(df.geometry, s) # setting other column with geometry values -> preserve geometry type for vals in [s, s.values]: df["other_geom"] = vals assert isinstance(df["other_geom"].values, GeometryArray) # overwriting existing non-geometry column -> preserve geometry type data = { "A": range(5), "B": np.arange(5.0), "other_geom": range(5), "geometry": [Point(x, y) for x, y in zip(range(5), range(5))], } df = GeoDataFrame(data) for vals in [s, s.values]: df["other_geom"] = vals assert isinstance(df["other_geom"].values, GeometryArray) def test_geometry_property(self): assert_geoseries_equal( self.df.geometry, self.df["geometry"], check_dtype=True, check_index_type=True, ) df = self.df.copy() new_geom = [ Point(x, y) for x, y in zip(range(len(self.df)), range(len(self.df))) ] df.geometry = new_geom new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs) assert_geoseries_equal(df.geometry, new_geom) assert_geoseries_equal(df["geometry"], new_geom) # new crs gs = new_geom.to_crs(crs="epsg:3857") df.geometry = gs assert df.crs == "epsg:3857" def test_geometry_property_errors(self): with pytest.raises(AttributeError): df = self.df.copy() del df["geometry"] df.geometry # list-like error with pytest.raises(ValueError): df = self.df2.copy() df.geometry = "value1" # list-like error with pytest.raises(ValueError): df = self.df.copy() df.geometry = "apple" # non-geometry error with pytest.raises(TypeError): df = self.df.copy() df.geometry = list(range(df.shape[0])) with pytest.raises(KeyError): df = self.df.copy() del df["geometry"] df["geometry"] # ndim error with pytest.raises(ValueError): df = self.df.copy() df.geometry = df def test_rename_geometry(self): assert self.df.geometry.name == "geometry" df2 = self.df.rename_geometry("new_name") assert df2.geometry.name == "new_name" df2 = self.df.rename_geometry("new_name", inplace=True) assert df2 is None assert self.df.geometry.name == "new_name" # existing column error msg = "Column named Shape_Area already exists" with pytest.raises(ValueError, match=msg): df2 = self.df.rename_geometry("Shape_Area") with pytest.raises(ValueError, match=msg): self.df.rename_geometry("Shape_Area", inplace=True) def test_set_geometry(self): geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))]) original_geom = self.df.geometry df2 = self.df.set_geometry(geom) assert self.df is not df2 assert_geoseries_equal(df2.geometry, geom, check_crs=False) assert_geoseries_equal(self.df.geometry, original_geom) assert_geoseries_equal(self.df["geometry"], self.df.geometry) # unknown column with pytest.raises(ValueError): self.df.set_geometry("nonexistent-column") # ndim error with pytest.raises(ValueError): self.df.set_geometry(self.df) # new crs - setting should default to GeoSeries' crs gs = GeoSeries(geom, crs="epsg:3857") new_df = self.df.set_geometry(gs) assert new_df.crs == "epsg:3857" # explicit crs overrides self and dataframe new_df = self.df.set_geometry(gs, crs="epsg:26909") assert new_df.crs == "epsg:26909" assert new_df.geometry.crs == "epsg:26909" # Series should use dataframe's new_df = self.df.set_geometry(geom.values) assert new_df.crs == self.df.crs assert new_df.geometry.crs == self.df.crs def test_set_geometry_col(self): g = self.df.geometry g_simplified = g.simplify(100) self.df["simplified_geometry"] = g_simplified df2 = self.df.set_geometry("simplified_geometry") # Drop is false by default assert "simplified_geometry" in df2 assert_geoseries_equal(df2.geometry, g_simplified) # If True, drops column and renames to geometry df3 = self.df.set_geometry("simplified_geometry", drop=True) assert "simplified_geometry" not in df3 assert_geoseries_equal(df3.geometry, g_simplified) def test_set_geometry_inplace(self): geom = [Point(x, y) for x, y in zip(range(5), range(5))] ret = self.df.set_geometry(geom, inplace=True) assert ret is None geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs) assert_geoseries_equal(self.df.geometry, geom) def test_set_geometry_series(self): # Test when setting geometry with a Series that # alignment will occur # # Reverse the index order # Set the Series to be Point(i,i) where i is the index self.df.index = range(len(self.df) - 1, -1, -1) d = {} for i in range(len(self.df)): d[i] = Point(i, i) g = GeoSeries(d) # At this point, the DataFrame index is [4,3,2,1,0] and the # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns # them to match indexes df = self.df.set_geometry(g) for i, r in df.iterrows(): assert i == r["geometry"].x assert i == r["geometry"].y def test_set_geometry_empty(self): df = pd.DataFrame(columns=["a", "geometry"], index=pd.DatetimeIndex([])) result = df.set_geometry("geometry") assert isinstance(result, GeoDataFrame) assert isinstance(result.index, pd.DatetimeIndex) def test_align(self): df = self.df2 res1, res2 = df.align(df) assert_geodataframe_equal(res1, df) assert_geodataframe_equal(res2, df) res1, res2 = df.align(df.copy()) assert_geodataframe_equal(res1, df) assert_geodataframe_equal(res2, df) # assert crs is / is not preserved on mixed dataframes df_nocrs = df.copy() df_nocrs.crs = None res1, res2 = df.align(df_nocrs) assert_geodataframe_equal(res1, df) assert res1.crs is not None assert_geodataframe_equal(res2, df_nocrs) assert res2.crs is None # mixed GeoDataFrame / DataFrame df_nogeom = pd.DataFrame(df.drop("geometry", axis=1)) res1, res2 = df.align(df_nogeom, axis=0) assert_geodataframe_equal(res1, df) assert type(res2) == pd.DataFrame assert_frame_equal(res2, df_nogeom) # same as above but now with actual alignment df1 = df.iloc[1:].copy() df2 = df.iloc[:-1].copy() exp1 = df.copy() exp1.iloc[0] = np.nan exp2 = df.copy() exp2.iloc[-1] = np.nan res1, res2 = df1.align(df2) assert_geodataframe_equal(res1, exp1) assert_geodataframe_equal(res2, exp2) df2_nocrs = df2.copy() df2_nocrs.crs = None exp2_nocrs = exp2.copy() exp2_nocrs.crs = None res1, res2 = df1.align(df2_nocrs) assert_geodataframe_equal(res1, exp1) assert res1.crs is not None assert_geodataframe_equal(res2, exp2_nocrs) assert res2.crs is None df2_nogeom = pd.DataFrame(df2.drop("geometry", axis=1)) exp2_nogeom = pd.DataFrame(exp2.drop("geometry", axis=1)) res1, res2 = df1.align(df2_nogeom, axis=0) assert_geodataframe_equal(res1, exp1) assert type(res2) == pd.DataFrame assert_frame_equal(res2, exp2_nogeom) def test_to_json(self): text = self.df.to_json() data = json.loads(text) assert data["type"] == "FeatureCollection" assert len(data["features"]) == 5 def test_to_json_geom_col(self): df = self.df.copy() df["geom"] = df["geometry"] df["geometry"] = np.arange(len(df)) df.set_geometry("geom", inplace=True) text = df.to_json() data = json.loads(text) assert data["type"] == "FeatureCollection" assert len(data["features"]) == 5 def test_to_json_na(self): # Set a value as nan and make sure it's written self.df.loc[self.df["BoroName"] == "Queens", "Shape_Area"] = np.nan text = self.df.to_json() data = json.loads(text) assert len(data["features"]) == 5 for f in data["features"]: props = f["properties"] assert len(props) == 4 if props["BoroName"] == "Queens": assert props["Shape_Area"] is None def test_to_json_bad_na(self): # Check that a bad na argument raises error with pytest.raises(ValueError): self.df.to_json(na="garbage") def test_to_json_dropna(self): self.df.loc[self.df["BoroName"] == "Queens", "Shape_Area"] = np.nan self.df.loc[self.df["BoroName"] == "Bronx", "Shape_Leng"] = np.nan text = self.df.to_json(na="drop") data = json.loads(text) assert len(data["features"]) == 5 for f in data["features"]: props = f["properties"] if props["BoroName"] == "Queens": assert len(props) == 3 assert "Shape_Area" not in props # Just make sure setting it to nan in a different row # doesn't affect this one assert "Shape_Leng" in props elif props["BoroName"] == "Bronx": assert len(props) == 3 assert "Shape_Leng" not in props assert "Shape_Area" in props else: assert len(props) == 4 def test_to_json_keepna(self): self.df.loc[self.df["BoroName"] == "Queens", "Shape_Area"] = np.nan self.df.loc[self.df["BoroName"] == "Bronx", "Shape_Leng"] = np.nan text = self.df.to_json(na="keep") data = json.loads(text) assert len(data["features"]) == 5 for f in data["features"]: props = f["properties"] assert len(props) == 4 if props["BoroName"] == "Queens": assert np.isnan(props["Shape_Area"]) # Just make sure setting it to nan in a different row # doesn't affect this one assert "Shape_Leng" in props elif props["BoroName"] == "Bronx": assert np.isnan(props["Shape_Leng"]) assert "Shape_Area" in props def test_copy(self): df2 = self.df.copy() assert type(df2) is GeoDataFrame assert self.df.crs == df2.crs def test_to_file_crs(self): """ Ensure that the file is written according to the crs if it is specified """ tempfilename = os.path.join(self.tempdir, "crs.shp") # save correct CRS self.df.to_file(tempfilename) df = GeoDataFrame.from_file(tempfilename) assert df.crs == self.df.crs # overwrite CRS self.df.to_file(tempfilename, crs=3857) df = GeoDataFrame.from_file(tempfilename) assert df.crs == "epsg:3857" # specify CRS for gdf without one df2 = self.df.copy() df2.crs = None df2.to_file(tempfilename, crs=2263) df = GeoDataFrame.from_file(tempfilename) assert df.crs == "epsg:2263" def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df["BoroName"].str.contains("B")] assert len(df) == 2 boros = df["BoroName"].values assert "Brooklyn" in boros assert "Bronx" in boros assert type(df) is GeoDataFrame def test_coord_slice_points(self): assert self.df2.cx[-2:-1, -2:-1].empty assert_frame_equal(self.df2, self.df2.cx[:, :]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, :]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[:, 5:]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, 5:]) def test_from_dict(self): data = {"A": [1], "geometry": [Point(0.0, 0.0)]} df = GeoDataFrame.from_dict(data, crs=3857) assert df.crs == "epsg:3857" assert df._geometry_column_name == "geometry" data = {"B": [1], "location": [Point(0.0, 0.0)]} df = GeoDataFrame.from_dict(data, geometry="location") assert df._geometry_column_name == "location" def test_from_features(self): fiona = pytest.importorskip("fiona") nybb_filename = geopandas.datasets.get_path("nybb") with fiona.open(nybb_filename) as f: features = list(f) crs = f.crs_wkt df = GeoDataFrame.from_features(features, crs=crs) validate_boro_df(df, case_sensitive=True) assert df.crs == crs def test_from_features_unaligned_properties(self): p1 = Point(1, 1) f1 = { "type": "Feature", "properties": { "a": 0 }, "geometry": p1.__geo_interface__, } p2 = Point(2, 2) f2 = { "type": "Feature", "properties": { "b": 1 }, "geometry": p2.__geo_interface__, } p3 = Point(3, 3) f3 = { "type": "Feature", "properties": { "a": 2 }, "geometry": p3.__geo_interface__, } df = GeoDataFrame.from_features([f1, f2, f3]) result = df[["a", "b"]] expected = pd.DataFrame.from_dict([{ "a": 0, "b": np.nan }, { "a": np.nan, "b": 1 }, { "a": 2, "b": np.nan }]) assert_frame_equal(expected, result) def test_from_features_geom_interface_feature(self): class Placemark(object): def __init__(self, geom, val): self.__geo_interface__ = { "type": "Feature", "properties": { "a": val }, "geometry": geom.__geo_interface__, } p1 = Point(1, 1) f1 = Placemark(p1, 0) p2 = Point(3, 3) f2 = Placemark(p2, 0) df = GeoDataFrame.from_features([f1, f2]) assert sorted(df.columns) == ["a", "geometry"] assert df.geometry.tolist() == [p1, p2] def test_from_feature_collection(self): data = { "name": ["a", "b", "c"], "lat": [45, 46, 47.5], "lon": [-120, -121.2, -122.9], } df = pd.DataFrame(data) geometry = [Point(xy) for xy in zip(df["lon"], df["lat"])] gdf = GeoDataFrame(df, geometry=geometry) # from_features returns sorted columns expected = gdf[["geometry", "lat", "lon", "name"]] # test FeatureCollection res = GeoDataFrame.from_features(gdf.__geo_interface__) assert_frame_equal(res, expected) # test list of Features res = GeoDataFrame.from_features(gdf.__geo_interface__["features"]) assert_frame_equal(res, expected) # test __geo_interface__ attribute (a GeoDataFrame has one) res = GeoDataFrame.from_features(gdf) assert_frame_equal(res, expected) def test_dataframe_to_geodataframe(self): df = pd.DataFrame( { "A": range(len(self.df)), "location": np.array(self.df.geometry) }, index=self.df.index, ) gf = df.set_geometry("location", crs=self.df.crs) assert isinstance(df, pd.DataFrame) assert isinstance(gf, GeoDataFrame) assert_geoseries_equal(gf.geometry, self.df.geometry) assert gf.geometry.name == "location" assert "geometry" not in gf gf2 = df.set_geometry("location", crs=self.df.crs, drop=True) assert isinstance(df, pd.DataFrame) assert isinstance(gf2, GeoDataFrame) assert gf2.geometry.name == "geometry" assert "geometry" in gf2 assert "location" not in gf2 assert "location" in df # should be a copy df.loc[0, "A"] = 100 assert gf.loc[0, "A"] == 0 assert gf2.loc[0, "A"] == 0 with pytest.raises(ValueError): df.set_geometry("location", inplace=True) def test_geodataframe_geointerface(self): assert self.df.__geo_interface__["type"] == "FeatureCollection" assert len(self.df.__geo_interface__["features"]) == self.df.shape[0] def test_geodataframe_iterfeatures(self): df = self.df.iloc[:1].copy() df.loc[0, "BoroName"] = np.nan # when containing missing values # null: ouput the missing entries as JSON null result = list(df.iterfeatures(na="null"))[0]["properties"] assert result["BoroName"] is None # drop: remove the property from the feature. result = list(df.iterfeatures(na="drop"))[0]["properties"] assert "BoroName" not in result.keys() # keep: output the missing entries as NaN result = list(df.iterfeatures(na="keep"))[0]["properties"] assert np.isnan(result["BoroName"]) # test for checking that the (non-null) features are python scalars and # not numpy scalars assert type(df.loc[0, "Shape_Leng"]) is np.float64 # null result = list(df.iterfeatures(na="null"))[0] assert type(result["properties"]["Shape_Leng"]) is float # drop result = list(df.iterfeatures(na="drop"))[0] assert type(result["properties"]["Shape_Leng"]) is float # keep result = list(df.iterfeatures(na="keep"))[0] assert type(result["properties"]["Shape_Leng"]) is float # when only having numerical columns df_only_numerical_cols = df[["Shape_Leng", "Shape_Area", "geometry"]] assert type(df_only_numerical_cols.loc[0, "Shape_Leng"]) is np.float64 # null result = list(df_only_numerical_cols.iterfeatures(na="null"))[0] assert type(result["properties"]["Shape_Leng"]) is float # drop result = list(df_only_numerical_cols.iterfeatures(na="drop"))[0] assert type(result["properties"]["Shape_Leng"]) is float # keep result = list(df_only_numerical_cols.iterfeatures(na="keep"))[0] assert type(result["properties"]["Shape_Leng"]) is float # geometry not set df = GeoDataFrame({ "values": [0, 1], "geom": [Point(0, 1), Point(1, 0)] }) with pytest.raises(AttributeError): list(df.iterfeatures()) def test_geodataframe_geojson_no_bbox(self): geo = self.df._to_geo(na="null", show_bbox=False) assert "bbox" not in geo.keys() for feature in geo["features"]: assert "bbox" not in feature.keys() def test_geodataframe_geojson_bbox(self): geo = self.df._to_geo(na="null", show_bbox=True) assert "bbox" in geo.keys() assert len(geo["bbox"]) == 4 assert isinstance(geo["bbox"], tuple) for feature in geo["features"]: assert "bbox" in feature.keys() def test_pickle(self): import pickle df2 = pickle.loads(pickle.dumps(self.df)) assert_geodataframe_equal(self.df, df2) def test_pickle_method(self): filename = os.path.join(self.tempdir, "df.pkl") self.df.to_pickle(filename) unpickled = pd.read_pickle(filename) assert_frame_equal(self.df, unpickled) assert self.df.crs == unpickled.crs def test_estimate_utm_crs(self): if PYPROJ_LT_3: with pytest.raises(RuntimeError, match=r"pyproj 3\+ required"): self.df.estimate_utm_crs() else: assert self.df.estimate_utm_crs() == CRS("EPSG:32618") assert self.df.estimate_utm_crs("NAD83") == CRS("EPSG:26918")
class TestDataFrame: def setup_method(self): N = 10 nybb_filename = geopandas.datasets.get_path('nybb') self.df = read_file(nybb_filename) self.tempdir = tempfile.mkdtemp() self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([ {'geometry': Point(x, y), 'value1': x + y, 'value2': x * y} for x, y in zip(range(N), range(N))], crs=self.crs) self.df3 = read_file( os.path.join(PACKAGE_DIR, 'examples', 'null_geom.geojson')) def teardown_method(self): shutil.rmtree(self.tempdir) def test_df_init(self): assert type(self.df2) is GeoDataFrame assert self.df2.crs == self.crs def test_different_geo_colname(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry='location') locs = GeoSeries(data['location'], crs=self.crs) assert_geoseries_equal(df.geometry, locs) assert 'geometry' not in df assert df.geometry.name == 'location' # internal implementation detail assert df._geometry_column_name == 'location' geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))] df2 = df.set_geometry(geom2, crs='dummy_crs') assert 'location' in df2 assert df2.crs == 'dummy_crs' assert df2.geometry.crs == 'dummy_crs' # reset so it outputs okay df2.crs = df.crs assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs)) def test_geo_getitem(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry='location') assert isinstance(df.geometry, GeoSeries) df['geometry'] = df["A"] assert isinstance(df.geometry, GeoSeries) assert df.geometry[0] == data['location'][0] # good if this changed in the future assert not isinstance(df['geometry'], GeoSeries) assert isinstance(df['location'], GeoSeries) data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5), range(5))] df = GeoDataFrame(data, crs=self.crs) assert isinstance(df.geometry, GeoSeries) assert isinstance(df['geometry'], GeoSeries) # good if this changed in the future assert not isinstance(df['location'], GeoSeries) def test_geometry_property(self): assert_geoseries_equal(self.df.geometry, self.df['geometry'], check_dtype=True, check_index_type=True) df = self.df.copy() new_geom = [Point(x, y) for x, y in zip(range(len(self.df)), range(len(self.df)))] df.geometry = new_geom new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs) assert_geoseries_equal(df.geometry, new_geom) assert_geoseries_equal(df['geometry'], new_geom) # new crs gs = GeoSeries(new_geom, crs="epsg:26018") df.geometry = gs assert df.crs == "epsg:26018" def test_geometry_property_errors(self): with pytest.raises(AttributeError): df = self.df.copy() del df['geometry'] df.geometry # list-like error with pytest.raises(ValueError): df = self.df2.copy() df.geometry = 'value1' # list-like error with pytest.raises(ValueError): df = self.df.copy() df.geometry = 'apple' # non-geometry error with pytest.raises(TypeError): df = self.df.copy() df.geometry = list(range(df.shape[0])) with pytest.raises(KeyError): df = self.df.copy() del df['geometry'] df['geometry'] # ndim error with pytest.raises(ValueError): df = self.df.copy() df.geometry = df def test_set_geometry(self): geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))]) original_geom = self.df.geometry df2 = self.df.set_geometry(geom) assert self.df is not df2 assert_geoseries_equal(df2.geometry, geom) assert_geoseries_equal(self.df.geometry, original_geom) assert_geoseries_equal(self.df['geometry'], self.df.geometry) # unknown column with pytest.raises(ValueError): self.df.set_geometry('nonexistent-column') # ndim error with pytest.raises(ValueError): self.df.set_geometry(self.df) # new crs - setting should default to GeoSeries' crs gs = GeoSeries(geom, crs="epsg:26018") new_df = self.df.set_geometry(gs) assert new_df.crs == "epsg:26018" # explicit crs overrides self and dataframe new_df = self.df.set_geometry(gs, crs="epsg:27159") assert new_df.crs == "epsg:27159" assert new_df.geometry.crs == "epsg:27159" # Series should use dataframe's new_df = self.df.set_geometry(geom.values) assert new_df.crs == self.df.crs assert new_df.geometry.crs == self.df.crs def test_set_geometry_col(self): g = self.df.geometry g_simplified = g.simplify(100) self.df['simplified_geometry'] = g_simplified df2 = self.df.set_geometry('simplified_geometry') # Drop is false by default assert 'simplified_geometry' in df2 assert_geoseries_equal(df2.geometry, g_simplified) # If True, drops column and renames to geometry df3 = self.df.set_geometry('simplified_geometry', drop=True) assert 'simplified_geometry' not in df3 assert_geoseries_equal(df3.geometry, g_simplified) def test_set_geometry_inplace(self): geom = [Point(x, y) for x, y in zip(range(5), range(5))] ret = self.df.set_geometry(geom, inplace=True) assert ret is None geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs) assert_geoseries_equal(self.df.geometry, geom) def test_set_geometry_series(self): # Test when setting geometry with a Series that # alignment will occur # # Reverse the index order # Set the Series to be Point(i,i) where i is the index self.df.index = range(len(self.df)-1, -1, -1) d = {} for i in range(len(self.df)): d[i] = Point(i, i) g = GeoSeries(d) # At this point, the DataFrame index is [4,3,2,1,0] and the # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns # them to match indexes df = self.df.set_geometry(g) for i, r in df.iterrows(): assert i == r['geometry'].x assert i == r['geometry'].y def test_align(self): df = self.df2 res1, res2 = df.align(df) assert_geodataframe_equal(res1, df) assert_geodataframe_equal(res2, df) res1, res2 = df.align(df.copy()) assert_geodataframe_equal(res1, df) assert_geodataframe_equal(res2, df) # assert crs is / is not preserved on mixed dataframes df_nocrs = df.copy() df_nocrs.crs = None res1, res2 = df.align(df_nocrs) assert_geodataframe_equal(res1, df) assert res1.crs is not None assert_geodataframe_equal(res2, df_nocrs) assert res2.crs is None # mixed GeoDataFrame / DataFrame df_nogeom = pd.DataFrame(df.drop('geometry', axis=1)) res1, res2 = df.align(df_nogeom, axis=0) assert_geodataframe_equal(res1, df) assert type(res2) == pd.DataFrame assert_frame_equal(res2, df_nogeom) # same as above but now with actual alignment df1 = df.iloc[1:].copy() df2 = df.iloc[:-1].copy() exp1 = df.copy() exp1.iloc[0] = np.nan exp2 = df.copy() exp2.iloc[-1] = np.nan res1, res2 = df1.align(df2) assert_geodataframe_equal(res1, exp1) assert_geodataframe_equal(res2, exp2) df2_nocrs = df2.copy() df2_nocrs.crs = None exp2_nocrs = exp2.copy() exp2_nocrs.crs = None res1, res2 = df1.align(df2_nocrs) assert_geodataframe_equal(res1, exp1) assert res1.crs is not None assert_geodataframe_equal(res2, exp2_nocrs) assert res2.crs is None df2_nogeom = pd.DataFrame(df2.drop('geometry', axis=1)) exp2_nogeom = pd.DataFrame(exp2.drop('geometry', axis=1)) res1, res2 = df1.align(df2_nogeom, axis=0) assert_geodataframe_equal(res1, exp1) assert type(res2) == pd.DataFrame assert_frame_equal(res2, exp2_nogeom) def test_to_json(self): text = self.df.to_json() data = json.loads(text) assert data['type'] == 'FeatureCollection' assert len(data['features']) == 5 def test_to_json_geom_col(self): df = self.df.copy() df['geom'] = df['geometry'] df['geometry'] = np.arange(len(df)) df.set_geometry('geom', inplace=True) text = df.to_json() data = json.loads(text) assert data['type'] == 'FeatureCollection' assert len(data['features']) == 5 def test_to_json_na(self): # Set a value as nan and make sure it's written self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan text = self.df.to_json() data = json.loads(text) assert len(data['features']) == 5 for f in data['features']: props = f['properties'] assert len(props) == 4 if props['BoroName'] == 'Queens': assert props['Shape_Area'] is None def test_to_json_bad_na(self): # Check that a bad na argument raises error with pytest.raises(ValueError): self.df.to_json(na='garbage') def test_to_json_dropna(self): self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan text = self.df.to_json(na='drop') data = json.loads(text) assert len(data['features']) == 5 for f in data['features']: props = f['properties'] if props['BoroName'] == 'Queens': assert len(props) == 3 assert 'Shape_Area' not in props # Just make sure setting it to nan in a different row # doesn't affect this one assert 'Shape_Leng' in props elif props['BoroName'] == 'Bronx': assert len(props) == 3 assert 'Shape_Leng' not in props assert 'Shape_Area' in props else: assert len(props) == 4 def test_to_json_keepna(self): self.df.loc[self.df['BoroName'] == 'Queens', 'Shape_Area'] = np.nan self.df.loc[self.df['BoroName'] == 'Bronx', 'Shape_Leng'] = np.nan text = self.df.to_json(na='keep') data = json.loads(text) assert len(data['features']) == 5 for f in data['features']: props = f['properties'] assert len(props) == 4 if props['BoroName'] == 'Queens': assert np.isnan(props['Shape_Area']) # Just make sure setting it to nan in a different row # doesn't affect this one assert 'Shape_Leng' in props elif props['BoroName'] == 'Bronx': assert np.isnan(props['Shape_Leng']) assert 'Shape_Area' in props def test_copy(self): df2 = self.df.copy() assert type(df2) is GeoDataFrame assert self.df.crs == df2.crs def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df['BoroName'].str.contains('B')] assert len(df) == 2 boros = df['BoroName'].values assert 'Brooklyn' in boros assert 'Bronx' in boros assert type(df) is GeoDataFrame def test_coord_slice_points(self): assert self.df2.cx[-2:-1, -2:-1].empty assert_frame_equal(self.df2, self.df2.cx[:, :]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, :]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[:, 5:]) assert_frame_equal(self.df2.loc[5:], self.df2.cx[5:, 5:]) def test_transform(self): df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) assert all(df2['geometry'].geom_almost_equals(utm['geometry'], decimal=2)) def test_transform_inplace(self): df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} lonlat = df2.to_crs(epsg=4326) df2.to_crs(epsg=4326, inplace=True) assert all(df2['geometry'].geom_almost_equals(lonlat['geometry'], decimal=2)) def test_to_crs_geo_column_name(self): # Test to_crs() with different geometry column name (GH#339) df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} df2 = df2.rename(columns={'geometry': 'geom'}) df2.set_geometry('geom', inplace=True) lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) assert lonlat.geometry.name == 'geom' assert utm.geometry.name == 'geom' assert all(df2.geometry.geom_almost_equals(utm.geometry, decimal=2)) def test_from_features(self): nybb_filename = geopandas.datasets.get_path('nybb') with fiona.open(nybb_filename) as f: features = list(f) crs = f.crs df = GeoDataFrame.from_features(features, crs=crs) validate_boro_df(df, case_sensitive=True) assert df.crs == crs def test_from_features_unaligned_properties(self): p1 = Point(1, 1) f1 = {'type': 'Feature', 'properties': {'a': 0}, 'geometry': p1.__geo_interface__} p2 = Point(2, 2) f2 = {'type': 'Feature', 'properties': {'b': 1}, 'geometry': p2.__geo_interface__} p3 = Point(3, 3) f3 = {'type': 'Feature', 'properties': {'a': 2}, 'geometry': p3.__geo_interface__} df = GeoDataFrame.from_features([f1, f2, f3]) result = df[['a', 'b']] expected = pd.DataFrame.from_dict([{'a': 0, 'b': np.nan}, {'a': np.nan, 'b': 1}, {'a': 2, 'b': np.nan}]) assert_frame_equal(expected, result) def test_from_feature_collection(self): data = {'name': ['a', 'b', 'c'], 'lat': [45, 46, 47.5], 'lon': [-120, -121.2, -122.9]} df = pd.DataFrame(data) geometry = [Point(xy) for xy in zip(df['lon'], df['lat'])] gdf = GeoDataFrame(df, geometry=geometry) # from_features returns sorted columns expected = gdf[['geometry', 'lat', 'lon', 'name']] # test FeatureCollection res = GeoDataFrame.from_features(gdf.__geo_interface__) assert_frame_equal(res, expected) # test list of Features res = GeoDataFrame.from_features(gdf.__geo_interface__['features']) assert_frame_equal(res, expected) # test __geo_interface__ attribute (a GeoDataFrame has one) res = GeoDataFrame.from_features(gdf) assert_frame_equal(res, expected) def test_from_postgis_default(self): con = connect('test_geopandas') if con is None or not create_postgis(self.df): raise pytest.skip() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con) finally: con.close() validate_boro_df(df, case_sensitive=False) def test_from_postgis_custom_geom_col(self): con = connect('test_geopandas') geom_col = "the_geom" if con is None or not create_postgis(self.df, geom_col=geom_col): raise pytest.skip() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con, geom_col=geom_col) finally: con.close() validate_boro_df(df, case_sensitive=False) def test_dataframe_to_geodataframe(self): df = pd.DataFrame({"A": range(len(self.df)), "location": list(self.df.geometry)}, index=self.df.index) gf = df.set_geometry('location', crs=self.df.crs) assert isinstance(df, pd.DataFrame) assert isinstance(gf, GeoDataFrame) assert_geoseries_equal(gf.geometry, self.df.geometry) assert gf.geometry.name == 'location' assert 'geometry' not in gf gf2 = df.set_geometry('location', crs=self.df.crs, drop=True) assert isinstance(df, pd.DataFrame) assert isinstance(gf2, GeoDataFrame) assert gf2.geometry.name == 'geometry' assert 'geometry' in gf2 assert 'location' not in gf2 assert 'location' in df # should be a copy df.loc[0, "A"] = 100 assert gf.loc[0, "A"] == 0 assert gf2.loc[0, "A"] == 0 with pytest.raises(ValueError): df.set_geometry('location', inplace=True) def test_geodataframe_geointerface(self): assert self.df.__geo_interface__['type'] == 'FeatureCollection' assert len(self.df.__geo_interface__['features']) == self.df.shape[0] def test_geodataframe_iterfeatures(self): df = self.df.iloc[:1].copy() df.loc[0, 'BoroName'] = np.nan # when containing missing values # null: ouput the missing entries as JSON null result = list(df.iterfeatures(na='null'))[0]['properties'] assert result['BoroName'] is None # drop: remove the property from the feature. result = list(df.iterfeatures(na='drop'))[0]['properties'] assert 'BoroName' not in result.keys() # keep: output the missing entries as NaN result = list(df.iterfeatures(na='keep'))[0]['properties'] assert np.isnan(result['BoroName']) # test for checking that the (non-null) features are python scalars and # not numpy scalars assert type(df.loc[0, 'Shape_Leng']) is np.float64 # null result = list(df.iterfeatures(na='null'))[0] assert type(result['properties']['Shape_Leng']) is float # drop result = list(df.iterfeatures(na='drop'))[0] assert type(result['properties']['Shape_Leng']) is float # keep result = list(df.iterfeatures(na='keep'))[0] assert type(result['properties']['Shape_Leng']) is float # when only having numerical columns df_only_numerical_cols = df[['Shape_Leng', 'Shape_Area', 'geometry']] assert type(df_only_numerical_cols.loc[0, 'Shape_Leng']) is np.float64 # null result = list(df_only_numerical_cols.iterfeatures(na='null'))[0] assert type(result['properties']['Shape_Leng']) is float # drop result = list(df_only_numerical_cols.iterfeatures(na='drop'))[0] assert type(result['properties']['Shape_Leng']) is float # keep result = list(df_only_numerical_cols.iterfeatures(na='keep'))[0] assert type(result['properties']['Shape_Leng']) is float def test_geodataframe_geojson_no_bbox(self): geo = self.df._to_geo(na="null", show_bbox=False) assert 'bbox' not in geo.keys() for feature in geo['features']: assert 'bbox' not in feature.keys() def test_geodataframe_geojson_bbox(self): geo = self.df._to_geo(na="null", show_bbox=True) assert 'bbox' in geo.keys() assert len(geo['bbox']) == 4 assert isinstance(geo['bbox'], tuple) for feature in geo['features']: assert 'bbox' in feature.keys() def test_pickle(self): filename = os.path.join(self.tempdir, 'df.pkl') self.df.to_pickle(filename) unpickled = pd.read_pickle(filename) assert_frame_equal(self.df, unpickled) assert self.df.crs == unpickled.crs def test_points_from_xy(self): # using GeoDataFrame column df = GeoDataFrame([{'x': x, 'y': x, 'z': x} for x in range(10)]) gs = [Point(x, x) for x in range(10)] gsz = [Point(x, x, x) for x in range(10)] geometry1 = points_from_xy(df['x'], df['y']) geometry2 = points_from_xy(df['x'], df['y'], df['z']) assert geometry1 == gs assert geometry2 == gsz # using GeoSeries or numpy arrays or lists for s in [GeoSeries(range(10)), np.arange(10), list(range(10))]: geometry1 = points_from_xy(s, s) geometry2 = points_from_xy(s, s, s) assert geometry1 == gs assert geometry2 == gsz # using different lenghts should throw error arr_10 = np.arange(10) arr_20 = np.arange(20) with pytest.raises(ValueError): points_from_xy(x=arr_10, y=arr_20) points_from_xy(x=arr_10, y=arr_10, z=arr_20) # Using incomplete arguments should throw error with pytest.raises(TypeError): points_from_xy(x=s) points_from_xy(y=s) points_from_xy(z=s)