def test_raise_nonpoly(dfs): polydf, _ = dfs pointdf = polydf.copy() pointdf['geometry'] = pointdf.geometry.centroid with pytest.raises(TypeError): overlay(pointdf, polydf, how="union")
def test_overlay(dfs, how, use_sindex, expected_features): """ Basic overlay test with small dummy example dataframes (from docs). Results obtained using QGIS 2.16 (Vector -> Geoprocessing Tools -> Intersection / Union / ...), saved to GeoJSON and pasted here """ df1, df2 = dfs result = overlay(df1, df2, how=how, use_sindex=use_sindex) # construction of result if how == 'identity': expected = pd.concat([ GeoDataFrame.from_features(expected_features['intersection']), GeoDataFrame.from_features(expected_features['difference']) ], ignore_index=True) else: expected = GeoDataFrame.from_features(expected_features[how]) # TODO needed adaptations to result # if how == 'union': # result = result.drop(['idx1', 'idx2'], axis=1).sort_values(['col1', 'col2']).reset_index(drop=True) # elif how in ('intersection', 'identity'): # result = result.drop(['idx1', 'idx2'], axis=1) assert_geodataframe_equal(result, expected) # for difference also reversed if how == 'difference': result = overlay(df2, df1, how=how, use_sindex=use_sindex) expected = GeoDataFrame.from_features( expected_features['difference_inverse']) assert_geodataframe_equal(result, expected)
def test_preserve_crs(dfs, how): df1, df2 = dfs result = overlay(df1, df2, how=how) assert result.crs is None crs = {'init': 'epsg:4326'} df1.crs = crs df2.crs = crs result = overlay(df1, df2, how=how) assert result.crs == crs
def test_union_no_index(self): # explicitly ignore indicies dfB = overlay(self.polydf, self.polydf2, how="union", use_sindex=False) self.assertEquals(dfB.shape, self.union_shape) # remove indicies from df self.polydf._sindex = None self.polydf2._sindex = None dfC = overlay(self.polydf, self.polydf2, how="union") self.assertEquals(dfC.shape, self.union_shape)
def test_geometry_not_named_geometry(self): # Issue #306 # Add points and flip names polydf3 = self.polydf.copy() polydf3 = polydf3.rename(columns={'geometry':'polygons'}) polydf3 = polydf3.set_geometry('polygons') polydf3['geometry'] = self.pointdf.geometry.loc[0:4] self.assertTrue(polydf3.geometry.name == 'polygons') df = overlay(polydf3, self.polydf2, how="union") self.assertTrue(type(df) is GeoDataFrame) df2 = overlay(self.polydf, self.polydf2, how="union") self.assertTrue(df.geom_almost_equals(df2).all())
def test_empty_intersection(dfs): df1, df2 = dfs polys3 = GeoSeries([Polygon([(-1, -1), (-3, -1), (-3, -3), (-1, -3)]), Polygon([(-3, -3), (-5, -3), (-5, -5), (-3, -5)])]) df3 = GeoDataFrame({'geometry': polys3, 'col3': [1, 2]}) expected = GeoDataFrame([], columns=['col1', 'col3', 'geometry']) result = overlay(df1, df3) assert_geodataframe_equal(result, expected, check_like=True)
def test_overlay_nybb(how): polydf = read_file(geopandas.datasets.get_path('nybb')) # construct circles dataframe N = 10 b = [int(x) for x in polydf.total_bounds] polydf2 = GeoDataFrame( [{'geometry': Point(x, y).buffer(10000), 'value1': x + y, 'value2': x - y} for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)), range(b[1], b[3], int((b[3]-b[1])/N)))], crs=polydf.crs) result = overlay(polydf, polydf2, how=how) cols = ['BoroCode', 'BoroName', 'Shape_Leng', 'Shape_Area', 'value1', 'value2'] if how == 'difference': cols = cols[:-2] # expected result if how == 'identity': # read union one, further down below we take the appropriate subset expected = read_file(os.path.join( DATA, 'nybb_qgis', 'qgis-union.shp')) else: expected = read_file(os.path.join( DATA, 'nybb_qgis', 'qgis-{0}.shp'.format(how))) # The result of QGIS for 'union' contains incorrect geometries: # 24 is a full original circle overlapping with unioned geometries, and # 27 is a completely duplicated row) if how == 'union': expected = expected.drop([24, 27]) expected.reset_index(inplace=True, drop=True) # Eliminate observations without geometries (issue from QGIS) expected = expected[expected.is_valid] expected.reset_index(inplace=True, drop=True) if how == 'identity': expected = expected[expected.BoroCode.notnull()].copy() # Order GeoDataFrames expected = expected.sort_values(cols).reset_index(drop=True) # TODO needed adaptations to result result = result.sort_values(cols).reset_index(drop=True) if how in ('union', 'identity'): # concat < 0.23 sorts, so changes the order of the columns # but at least we ensure 'geometry' is the last column assert result.columns[-1] == 'geometry' assert len(result.columns) == len(expected.columns) result = result.reindex(columns=expected.columns) assert_geodataframe_equal(result, expected, check_crs=False, check_column_type=False,)
def test_geometry_not_named_geometry(dfs, how, other_geometry): # Issue #306 # Add points and flip names df1, df2 = dfs df3 = df1.copy() df3 = df3.rename(columns={'geometry': 'polygons'}) df3 = df3.set_geometry('polygons') if other_geometry: df3['geometry'] = df1.centroid.geometry assert df3.geometry.name == 'polygons' res1 = overlay(df1, df2, how=how) res2 = overlay(df3, df2, how=how) assert df3.geometry.name == 'polygons' if how == 'difference': # in case of 'difference', column names of left frame are preserved assert res2.geometry.name == 'polygons' if other_geometry: assert 'geometry' in res2.columns assert_geoseries_equal(res2['geometry'], df3['geometry'], check_series_type=False) res2 = res2.drop(['geometry'], axis=1) res2 = res2.rename(columns={'polygons': 'geometry'}) res2 = res2.set_geometry('geometry') # TODO if existing column is overwritten -> geometry not last column if other_geometry and how == 'intersection': res2 = res2.reindex(columns=res1.columns) assert_geodataframe_equal(res1, res2) df4 = df2.copy() df4 = df4.rename(columns={'geometry': 'geom'}) df4 = df4.set_geometry('geom') if other_geometry: df4['geometry'] = df2.centroid.geometry assert df4.geometry.name == 'geom' res1 = overlay(df1, df2, how=how) res2 = overlay(df1, df4, how=how) assert_geodataframe_equal(res1, res2)
def test_overlay(dfs_index, how, use_sindex): """ Basic overlay test with small dummy example dataframes (from docs). Results obtained using QGIS 2.16 (Vector -> Geoprocessing Tools -> Intersection / Union / ...), saved to GeoJSON """ df1, df2 = dfs_index result = overlay(df1, df2, how=how, use_sindex=use_sindex) # construction of result def _read(name): expected = read_file( os.path.join(DATA, 'polys', 'df1_df2-{0}.geojson'.format(name))) expected.crs = None return expected if how == 'identity': expected_intersection = _read('intersection') expected_difference = _read('difference') expected = pd.concat([ expected_intersection, expected_difference ], ignore_index=True, sort=False) expected['col1'] = expected['col1'].astype(float) else: expected = _read(how) # TODO needed adaptations to result if how == 'union': result = result.sort_values(['col1', 'col2']).reset_index(drop=True) elif how == 'difference': result = result.reset_index(drop=True) assert_geodataframe_equal(result, expected, check_column_type=False) # for difference also reversed if how == 'difference': result = overlay(df2, df1, how=how, use_sindex=use_sindex) result = result.reset_index(drop=True) expected = _read('difference-inverse') assert_geodataframe_equal(result, expected, check_column_type=False)
def test_union_non_numeric_index(self): import string letters = list(string.ascii_letters) polydf_alpha = self.polydf.copy() polydf2_alpha = self.polydf2.copy() polydf_alpha.index = letters[:len(polydf_alpha)] polydf2_alpha.index = letters[:len(polydf2_alpha)] df = overlay(polydf_alpha, polydf2_alpha, how="union") assert type(df) is GeoDataFrame assert df.shape == self.union_shape assert 'value1' in df.columns and 'Shape_Area' in df.columns
def test_correct_index(dfs): # GH883 - case where the index was not properly reset df1, df2 = dfs polys3 = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]), Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]), Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])]) df3 = GeoDataFrame({'geometry': polys3, 'col3': [1, 2, 3]}) i1 = Polygon([(1, 1), (1, 3), (3, 3), (3, 1), (1, 1)]) i2 = Polygon([(3, 3), (3, 5), (5, 5), (5, 3), (3, 3)]) expected = GeoDataFrame([[1, 1, i1], [3, 2, i2]], columns=['col3', 'col2', 'geometry']) result = overlay(df3, df2) assert_geodataframe_equal(result, expected)
def raster_gt(box, gt): transform = rio.transform.from_bounds(*box.geometry.values[0].bounds, 768, 768) species_encoding = {1001: 1, 1005: 2} inter = gpd.overlay(gt, box, how='intersection') shapes = ((row.geometry, species_encoding[row.Species]) for _, row in inter.iterrows()) rastered_shape = rio.features.rasterize(shapes=shapes, out_shape=(768, 768), transform=transform) rgb_mask = np.zeros((768, 768, 3)) rgb_mask[:, :, 0] = rastered_shape == 1 rgb_mask[:, :, 1] = rastered_shape == 2 return rgb_mask
def build_mask(data, grid): dI = gpd.overlay(grid.df, data.to_crs(epsg=grid.epsg), how="intersection", keep_geom_type=False) dO = gpd.overlay(grid.df, data.to_crs(epsg=grid.epsg), how="difference", keep_geom_type=False) dataI = np.array(dI["geometry"][0].array_interface()["data"]).reshape( -1, 2) dataO = np.array(dO["geometry"][0].array_interface()["data"]).reshape( -1, 2) dataM = pd.DataFrame(np.vstack((dataI, dataO)), columns=["x", "y"]) dataM["mask"] = np.nan dataM.iloc[:dataI.shape[0], 2] = True dataM.iloc[dataI.shape[0]:, 2] = False dataM = dataM.sort_values(["x", "y"]) mask_array = dataM["mask"].values.astype(int).reshape(grid.nx, grid.ny).T return mask_array
def spatialSwathFilter(region, h5files, subgroup='//All_Data/ATMS-SDR-GEO_All/'): swathGroups = groupSwathFiles(h5files) geoms, sdrnames, geonames = [], [], [] for sg in swathGroups: geoFile, sdrFile = sg geoBase = 'HDF5:"{0}":{1}{2}' lats = gdal.Open(geoBase.format(geoFile, subgroup, 'Latitude')).ReadAsArray() lons = gdal.Open(geoBase.format(geoFile, subgroup, 'Longitude')).ReadAsArray() view = gdal.Open( geoBase.format(geoFile, subgroup, 'SatelliteZenithAngle')).ReadAsArray() yp, xp = np.where(view < 50) minindex, maxindex = xp.min(), xp.max() lons = lons[:, minindex:maxindex] lats = lats[:, minindex:maxindex] wVerts = [(lons[i, 0], lats[i, 0]) for i in range(lats.shape[0]) if (lons[i, 0] > -200) and (lats[i, 0] > -200)][::-1] nVerts = [(lons[0, i], lats[0, i]) for i in range(lats.shape[1]) if (lons[0, i] > -200) and (lats[0, i] > -200)] eVerts = [(lons[i, -1], lats[i, -1]) for i in range(lats.shape[0]) if (lons[i, -1] > -200) and (lats[i, -1] > -200)] sVerts = [(lons[-1, i], lats[-1, i]) for i in range(lats.shape[1]) if (lons[-1, i] > -200) and (lats[-1, i] > -200)] sVerts.append(wVerts[0]) verts = wVerts + nVerts + eVerts + sVerts geoms.append(geometry.Polygon(verts)) sdrnames.append(sdrFile) geonames.append(geoFile) swathGeo = gpd.GeoDataFrame(pd.DataFrame({ 'sdr': sdrnames, 'geo': geonames, 'geometry': geoms }), geometry=geoms) swathGeo.crs = {'init': 'epsg:4326'} intersection = gpd.overlay(region, swathGeo, how='intersection') return list(intersection.sdr), list(intersection.geo)
def clipMask(crs, shpFile, minX, maxX, minY, maxY, **kwargs): '''Clip a shapefile to extent For better performance first spatial join with intersect is computed, and then overlapping polygons are clipped with geopandas overlay New polygon area is calculated. Note that it assumes that crs is projected for area calculation :param minX: :param maxX :param minY: :param maxY: :param shpFile: :return: ''' logger.info('Starting to clip polygon minX=%s, maxX=%s, minY=%s, maxY=%s', minX, maxX, minY, maxY) logger.info('Opening mask file: %s', shpFile) # create extent geodataframe start = time.time() # open mask shapefile masksShp = gp.read_file(shpFile) logger.info('Creating clip polygon from extent...') extent = gp.GeoSeries([ Polygon([(minX, minY), (minX, maxY), (maxX, maxY), (maxX, minY), (minX, minY)]) ]) dfExtent = gp.GeoDataFrame(geometry=extent) dfExtent.crs = crs logger.info('Intersecting Shapefile with extent...') # intersect with extent maskIntersect = gp.sjoin(masksShp, dfExtent, how='inner', op='intersects') #drop columns except geometry and Area maskIntersect.drop(maskIntersect.columns.difference(['geometry', 'Area']), 1, inplace=True) #rename Area column to area_old maskIntersect.rename(columns={'Area': 'area_old'}, inplace=True) logger.info('Clip overlapping polygons...') maskClipped = gp.overlay(maskIntersect, dfExtent, how='intersection') logger.info('Total time used for clipping %s seconds', '{0:.3g}'.format(time.time() - start)) return maskClipped
def run(self, region: MultiPolygon, period: DateRange, granularity: TimeAggregation, pollutant: Pollutant) -> tuple[DataFrame, GeoDataFrame]: self._validate(region, period, granularity, pollutant) self._state = Status.RUNNING self._progress = 0 # Generate data frame with random emission values per GNFR sector data = self._create_gnfr_table(pollutant) for sector in GNFR: data.loc[sector] = [random() * 100, random() * 18, random() * 22] # Add totals row at the bottom data.loc["Totals"] = data.sum(axis=0) self._progress = 50 # Generate bogus grid with random emission values geo_data, _ = self._create_grid(region, .1, .1, snap=False) geo_data = overlay(geo_data, GeoDataFrame({'geometry': [region]}, crs="EPSG:4326"), how='intersection') geo_data.insert(0, "Area [km²]", geo_data.to_crs(epsg=8857).area / 10**6) # Equal earth projection geo_data.insert(1, f"Total {pollutant.name} emissions [kg]", [random() * 100 for _ in range(geo_data.shape[0])]) geo_data.insert(2, "Umin [%]", 42) geo_data.insert(3, "Umax [%]", 42) geo_data.insert(4, "Number of values [1]", len(period)) geo_data.insert(5, "Missing values [1]", 0) if granularity is TimeAggregation.YEARLY: headings = self._create_column_headings_per_year(period, pollutant) elif granularity is TimeAggregation.MONTHLY: headings = self._create_column_headings_per_month( period, pollutant) else: headings = [ f"{day} {pollutant.name} emissions [kg]" for day in period ] for count, heading in enumerate(headings): geo_data.insert(6 + count, heading, [ random() * 100 / len(headings) for _ in range(geo_data.shape[0]) ]) self._progress = 100 self._state = Status.READY return self._create_result_tuple(data, geo_data)
def overlay(): polys1 = geopandas.GeoSeries([Polygon([(0,0), (2,0), (2,2), (0,2)]), Polygon([(2,2), (4,2), (4,4), (2,4)])]) polys2 = geopandas.GeoSeries([Polygon([(1,1), (3,1), (3,3), (1,3)]), Polygon([(3,3), (5,3), (5,5), (3,5)])]) df1 = geopandas.GeoDataFrame({'geometry': polys1, 'df1':[1,2]}) df2 = geopandas.GeoDataFrame({'geometry': polys2, 'df2':[1,2]}) #原始叠加显示 ax = df1.plot(color='red') df2.plot(ax=ax, color='green', alpha=0.5) plt.title('data') #联合 res_union = geopandas.overlay(df1, df2, how='union') ax = res_union.plot(alpha=0.5, cmap='tab10') df1.plot(ax=ax, facecolor='none', edgecolor='k') df2.plot(ax=ax, facecolor='none', edgecolor='k') plt.title('union') #相交 res_intersection = geopandas.overlay(df1, df2, how='intersection') ax = res_intersection.plot(alpha=0.5, cmap='tab10') df1.plot(ax=ax, facecolor='none', edgecolor='k') df2.plot(ax=ax, facecolor='none', edgecolor='k') plt.title('intersection') #交集取反 res_symdiff = geopandas.overlay(df1, df2, how='symmetric_difference') ax = res_symdiff.plot(alpha=0.5, cmap='tab10') df1.plot(ax=ax, facecolor='none', edgecolor='k') df2.plot(ax=ax, facecolor='none', edgecolor='k') plt.title('symmetric_difference') plt.show()
def test_overlay_overlap(how): """ Overlay test with overlapping geometries in both dataframes. Test files are created with:: import geopandas from geopandas import GeoSeries, GeoDataFrame from shapely.geometry import Point, Polygon, LineString s1 = GeoSeries([Point(0, 0), Point(1.5, 0)]).buffer(1, resolution=2) s2 = GeoSeries([Point(1, 1), Point(2, 2)]).buffer(1, resolution=2) df1 = GeoDataFrame({'geometry': s1, 'col1':[1,2]}) df2 = GeoDataFrame({'geometry': s2, 'col2':[1, 2]}) ax = df1.plot(alpha=0.5) df2.plot(alpha=0.5, ax=ax, color='C1') df1.to_file('geopandas/geopandas/tests/data/df1_overlap.geojson', driver='GeoJSON') df2.to_file('geopandas/geopandas/tests/data/df2_overlap.geojson', driver='GeoJSON') and then overlay results are obtained from using QGIS 2.16 (Vector -> Geoprocessing Tools -> Intersection / Union / ...), saved to GeoJSON. """ df1 = read_file(os.path.join(DATA, 'overlap', 'df1_overlap.geojson')) df2 = read_file(os.path.join(DATA, 'overlap', 'df2_overlap.geojson')) result = overlay(df1, df2, how=how) if how == 'identity': raise pytest.skip() expected = read_file(os.path.join( DATA, 'overlap', 'df1_df2_overlap-{0}.geojson'.format(how))) if how == 'union': # the QGIS result has the last row duplicated, so removing this expected = expected.iloc[:-1] # TODO needed adaptations to result result = result.reset_index(drop=True) if how == 'union': result = result.sort_values(['col1', 'col2']).reset_index(drop=True) assert_geodataframe_equal(result, expected, check_column_type=False, check_less_precise=True)
def extract_tile_items(raster_features, labels, min_x, min_y, tile_width, tile_height): """Extract label items that belong to the tile defined by the minimum horizontal pixel `min_x` (left tile limit), the minimum vertical pixel `min_y` (upper tile limit) and the sizes ̀tile_width` and `tile_height` measured as a pixel amount. The tile is cropped from the original image raster as follows: - horizontally, between `min_x` and `min_x+tile_width` - vertically, between `min_y` and `min_y+tile_height` This method takes care of original data projection (UTM 37S, Tanzania area), however this parameter may be changed if similar data on another projection is used. Parameters ---------- raster_features : dict Raw image raster geographical features (`north`, `south`, `east` and `west` coordinates, `weight` and `height` measured in pixels) labels : geopandas.GeoDataFrame Raw image labels, as a set of geometries min_x : int Left tile limit, as a horizontal pixel index min_y : int Upper tile limit, as a vertical pixel index tile_width : int Tile width, measured in pixel tile_height : int Tile height, measured in pixel Returns ------- geopandas.GeoDataFrame Set of ground-truth labels contained into the tile, characterized by their type (complete, unfinished or foundation) and their geometry """ area = get_tile_footprint(raster_features, min_x, min_y, tile_width, tile_height) bdf = gpd.GeoDataFrame(crs=fiona.crs.from_epsg(raster_features["srid"]), geometry=[area]) reproj_labels = labels.to_crs(epsg=raster_features["srid"]) tile_items = gpd.sjoin(reproj_labels, bdf) if tile_items.shape[0] == 0: return tile_items[["condition", "geometry"]] tile_items = gpd.overlay(tile_items, bdf) tile_items = tile_items.explode() # Manage MultiPolygons return tile_items[["condition", "geometry"]]
def test_non_overlapping(how): p1 = Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]) p2 = Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]) df1 = GeoDataFrame({"col1": [1], "geometry": [p1]}) df2 = GeoDataFrame({"col2": [2], "geometry": [p2]}) result = overlay(df1, df2, how=how) if how == "intersection": expected = GeoDataFrame( { "col1": np.array([], dtype="int64"), "col2": np.array([], dtype="int64"), "geometry": [], }, index=pd.Index([], dtype="object"), ) elif how == "union": expected = GeoDataFrame( { "col1": [1, np.nan], "col2": [np.nan, 2], "geometry": [p1, p2], } ) elif how == "identity": expected = GeoDataFrame( { "col1": [1.0], "col2": [np.nan], "geometry": [p1], } ) elif how == "symmetric_difference": expected = GeoDataFrame( { "col1": [1, np.nan], "col2": [np.nan, 2], "geometry": [p1, p2], } ) elif how == "difference": expected = GeoDataFrame( { "col1": [1], "geometry": [p1], } ) assert_geodataframe_equal(result, expected)
def get_user_data_by_vicinity(): all_user_data_result = get_all_user_data() if not const.HTTP_ERROR: # Set the area proximity geodataframe df_wgs84 = pd.DataFrame({ 'id': [1], 'latitude': [1.23], 'longitude': [-0.213] }) df_wgs84_geom = [ Point(xy) for xy in zip(df_wgs84.longitude, df_wgs84.latitude) ] wgs84_crs = {'init': 'epsg:4326'} geo_df_wgs84 = gpd.GeoDataFrame(df_wgs84, crs=wgs84_crs, geometry=df_wgs84_geom) df_web_mercator = geo_df_wgs84.to_crs(epsg=3857) df_web_mercator['geometry'] = df_web_mercator.geometry.buffer( const.PROXIMITY) # Set the user data geodataframe all_user_df = pd.DataFrame.from_dict(all_user_data_result, orient='columns') all_user_df['longitude'] = pd.to_numeric(all_user_df['longitude'], downcast='float') all_user_df['latitude'] = pd.to_numeric(all_user_df['latitude'], downcast='float') all_user_geom = [ Point(xy) for xy in zip(all_user_df.longitude, all_user_df.latitude) ] all_user_geo_df = gpd.GeoDataFrame(all_user_df, crs=wgs84_crs, geometry=all_user_geom) all_user_geo_df_web_mercator = all_user_geo_df.to_crs(epsg=3857) # Overlay the user data geodataframe with the area proximity geodataframe to retrieve target users data target_users_gdf = gpd.overlay(all_user_geo_df_web_mercator, df_web_mercator, how='intersection') # Set the target users list target_users_id_list = [] for index, row in target_users_gdf.iterrows(): target_users_id_list.append(row['id_1']) target_users_item_list = [] for user_item in all_user_data_result: if user_item['id'] in target_users_id_list: target_users_item_list.append(user_item) return target_users_item_list else: return all_user_data_result
def _generate_ground_truth(w, h, crop_size, annotation_polygon: Polygon, pixel_annotation_value): """ :param w: :param h: :param crop_size: :param annotation_polygon: :param pixel_annotation_value: :return: """ patch_mask_polygon = Polygon([(w, h), (w + crop_size, h), (w + crop_size, h + crop_size), (w, h + crop_size)]) patch_mask_polygon = gpd.GeoSeries(patch_mask_polygon) annotation_polygon = gpd.GeoSeries(annotation_polygon) # Get the intersection of the `patch mask and an annotation # # 'patch_mask' would fed into GeoDataFrame as dataset. gdf_mask = gpd.GeoDataFrame({ 'geometry': patch_mask_polygon, 'patch_mask': pixel_annotation_value }) gdf_curr_annotation = gpd.GeoDataFrame({'geometry': annotation_polygon}) gdf_mask_curr_anno_diff = gpd.overlay(gdf_mask, gdf_curr_annotation, how='intersection') if not gdf_mask_curr_anno_diff.empty: # 'geom' work as boundary box mask_curr_anno_intersection_rasterized = \ make_geocube(vector_data=gdf_mask_curr_anno_diff, resolution=(1., 1.), geom=json.dumps(mapping(box(w, h, w+crop_size, h+crop_size))), fill=opt.pixel_anno_ignore) # TODO: refactor a transformation of geocube data to numpy array intersection_data = mask_curr_anno_intersection_rasterized.to_dict() intersection_data = intersection_data['data_vars']['patch_mask'][ 'data'] patch_ground_truth = np.array(intersection_data) return patch_ground_truth return np.full((crop_size, crop_size), pixel_annotation_value).astype(np.float)
def get_country_geometries(country_names=None, extent=None, resolution=10): """Returns a gpd GeoSeries of natural earth multipolygons of the specified countries, resp. the countries that lie within the specified extent. If no arguments are given, simply returns the whole natural earth dataset. Take heed: we assume WGS84 as the CRS unless the Natural Earth download utility from cartopy starts including the projection information. (They are saving a whopping 147 bytes by omitting it.) Same goes for UTF. Parameters: country_names (list, optional): list with ISO3 names of countries, e.g ['ZWE', 'GBR', 'VNM', 'UZB'] extent (tuple, optional): (min_lon, max_lon, min_lat, max_lat) assumed to be in the same CRS as the natural earth data. resolution (float, optional): 10, 50 or 110. Resolution in m. Default: 10m Returns: GeoDataFrame """ resolution = nat_earth_resolution(resolution) shp_file = shapereader.natural_earth(resolution=resolution, category='cultural', name='admin_0_countries') nat_earth = gpd.read_file(shp_file, encoding='UTF-8') if not nat_earth.crs: nat_earth.crs = NE_CRS if country_names: if isinstance(country_names, str): country_names = [country_names] out = nat_earth[nat_earth.ISO_A3.isin(country_names)] elif extent: bbox = Polygon([ (extent[0], extent[2]), (extent[0], extent[3]), (extent[1], extent[3]), (extent[1], extent[2]) ]) bbox = gpd.GeoSeries(bbox, crs=nat_earth.crs) bbox = gpd.GeoDataFrame({'geometry': bbox}, crs=nat_earth.crs) out = gpd.overlay(nat_earth, bbox, how="intersection") else: out = nat_earth return out
def test_correct_index(dfs): # GH883 - case where the index was not properly reset df1, df2 = dfs polys3 = GeoSeries([ Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]), Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]), Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]), ]) df3 = GeoDataFrame({"geometry": polys3, "col3": [1, 2, 3]}) i1 = Polygon([(1, 1), (1, 3), (3, 3), (3, 1), (1, 1)]) i2 = Polygon([(3, 3), (3, 5), (5, 5), (5, 3), (3, 3)]) expected = GeoDataFrame([[1, 1, i1], [3, 2, i2]], columns=["col3", "col2", "geometry"]) result = overlay(df3, df2, keep_geom_type=True) assert_geodataframe_equal(result, expected)
def intersect_aois(shps): logger.info('Finding intersection among provided polygons...') logger.info('Reading: {}'.format(shps[0])) shp = gpd.read_file(shps[0]) if len(shp) > 1: shp = dissolve_gdf(shp) for s in shps[1:]: logger.info('Reading: {}'.format(s)) shp2 = gpd.read_file(s) if len(shp2) > 1: shp2 = dissolve_gdf(shp2) logger.info('Finding intersection...') shp = gpd.overlay(shp, shp2) return shp
def get_area_and_start_point(): #! load crawl area area = gpd.read_file('../input/area_test.geojson') df_roads = gpd.read_file("../input/深圳市_osm路网_道路.geojson") df_nodes = gpd.read_file("../input/深圳市_osm路网_节点.geojson") roads = gpd.overlay(df_roads, area, how="intersection") nodes = gpd.overlay(df_nodes, area, how="intersection") if False: ax = map_visualize(roads, color='red', scale=0.1) nodes.plot(ax=ax, ) ax.axis('off') ax = map_visualize(roads.set_geometry('start'), color='red', scale=0.1) ax.axis('off') roads.loc[:, 'start'] = roads.geometry.apply( lambda i: Point(i.xy[0][0], i.xy[1][0])) roads.loc[:, 'end'] = roads.geometry.apply( lambda i: Point(i.xy[0][-1], i.xy[1][-1])) roads.loc[:, 'start_bd_mc'] = roads.start.apply( lambda i: wgs_to_bd_mc(*i.coords[0])) return roads.start_bd_mc.values.tolist(), area.loc[0].geometry
def dist_cont(point_df,dist_list,outside,buff_res): if point_df.crs != outside.crs: print('Point df and Outside df are not the same CRS') return None # Making outside area out dissolved object out_cop = outside[['geometry']].copy() out_cop['Constant'] = 1 out_cop = out_cop.dissolve('Constant') # Make sure points are inside area inside = point_df.within(out_cop['geometry'][1]) point_cop = point_df[inside].copy() point_cop = point_df.copy() point_cop['Constant'] = 1 #Constant for dissolve point_cop = point_cop[['Constant','geometry']].copy() res_buffers = [] for i,d in enumerate(dist_list): print(f'Doing buffer {d}') if i == 0: res = dissolve_buff(point_cop, d, buff_res) res_buffers.append(res.copy()) else: res_new = dissolve_buff(point_cop, d, buff_res) res_buffonly = gpd.overlay(res_new, res, how='difference') res = res_new.copy() res_buffers.append( res_buffonly.copy() ) # Now take the difference with the larger area print('Working on leftover difference now') leftover = gpd.overlay(out_cop, res, how='difference') res_buffers.append(leftover) for i,d in enumerate(dist_list): res_buffers[i]['Distance'] = str(d) res_buffers[-1]['Distance'] = 'Outside' # New geopandas DF comb_df = pd.concat(res_buffers) comb_df.reset_index(inplace=True, drop=True) return comb_df
def stackUnion(add, stack, thing, sliver_size=0.001): # Prepare to fail failures = [] backup = stack.copy() # Ensure the geometries are all valid add = ensureValid(add) stack = ensureValid(stack) # Union the new layer to the overlay try: try: stack = gpd.overlay(add, stack, "union") except: stack = roundCoords(stack, 5) try: stack = gpd.overlay(add, stack, "union") except: add = roundCoords(add, 5) stack = gpd.overlay(ensureValid(add), ensureValid(stack), "union") # Round the coordinates stack = roundCoords(stack, 5) print(f" Added {thing}{' '*(21-len(thing))}{now()}") except Exception as e: failures.append(thing) print(e) print(f"--- FAILED TO ADD {thing} ---------\n") return backup, failures # Return the new union stack = ensureValid(stack) return stack, failures
def overlay_grid_and_plants(gdf, grid, img_path): gdf_rd = gdf.to_crs(epsg=28992) #create new columns grid['id'] = range(len(grid)) grid['area'] = np.nan #intersect data and grid intersect = gpd.overlay(grid, gdf_rd, how='intersection') #calculate area per grid cell and write area to column for ID in intersect.id: temp = intersect[intersect['id'] == ID] area = temp.geometry.area.sum() grid.loc[ID, 'area'] = float(area) #write grid to file as output grid.to_file(os.path.dirname(img_path) + '/plant_area_grid.shp') return grid
def filter_map(gdf, bbox=[0, 0, 180, 90]): p1 = Point(bbox[0], bbox[3]) p2 = Point(bbox[2], bbox[3]) p3 = Point(bbox[2], bbox[1]) p4 = Point(bbox[0], bbox[1]) np1 = (p1.coords.xy[0][0], p1.coords.xy[1][0]) np2 = (p2.coords.xy[0][0], p2.coords.xy[1][0]) np3 = (p3.coords.xy[0][0], p3.coords.xy[1][0]) np4 = (p4.coords.xy[0][0], p4.coords.xy[1][0]) filter = gpd.GeoDataFrame(gpd.GeoSeries(Polygon([np1, np2, np3, np4])), columns=['geometry'], crs=gdf.crs) return gpd.overlay(gdf, filter, how='intersection')
def overlap_calc(_id, poly, grid_file, weight, service_type): value_dict = Counter() if type(poly.iloc[0][service_type]) != type(None): value = float(poly[service_type]) * weight intersect = gpd.overlay(grid_file, poly, how='intersection') intersect['overlapped'] = intersect.area intersect['percent'] = intersect['overlapped'] / intersect['area'] intersect = intersect[intersect['percent'] >= 0.5] intersect_region = intersect['id'] for intersect_id in intersect_region: try: value_dict[intersect_id] += value except: value_dict[intersect_id] = value return (_id, value_dict)
def test_correct_index(dfs): # GH883 - case where the index was not properly reset df1, df2 = dfs polys3 = GeoSeries([ Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]), Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]), Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]) ]) df3 = GeoDataFrame({'geometry': polys3, 'col3': [1, 2, 3]}) i1 = Polygon([(1, 1), (1, 3), (3, 3), (3, 1), (1, 1)]) i2 = Polygon([(3, 3), (3, 5), (5, 5), (5, 3), (3, 3)]) expected = GeoDataFrame([[1, 1, i1], [3, 2, i2]], columns=['col3', 'col2', 'geometry']) result = overlay(df3, df2) assert_geodataframe_equal(result, expected)
def test_keep_geom_type_geometry_collection2(): polys1 = [ box(0, 0, 1, 1), box(1, 1, 3, 3).union(box(1, 3, 5, 5)), ] polys2 = [ box(0, 0, 1, 1), box(3, 1, 4, 2).union(box(4, 1, 5, 4)), ] df1 = GeoDataFrame({"left": [0, 1], "geometry": polys1}) df2 = GeoDataFrame({"right": [0, 1], "geometry": polys2}) result1 = overlay(df1, df2, keep_geom_type=True) expected1 = GeoDataFrame( { "left": [0, 1], "right": [0, 1], "geometry": [box(0, 0, 1, 1), box(4, 3, 5, 4)], } ) assert_geodataframe_equal(result1, expected1) result1 = overlay(df1, df2, keep_geom_type=False) expected1 = GeoDataFrame( { "left": [0, 1, 1], "right": [0, 0, 1], "geometry": [ box(0, 0, 1, 1), Point(1, 1), GeometryCollection([box(4, 3, 5, 4), LineString([(3, 1), (3, 2)])]), ], } ) assert_geodataframe_equal(result1, expected1)
def filter_poly( poly_pieces_path, markup_path, pieces_info_path, original_image_path, image_pieces_path, mask_pieces_path, pxl_size_threshold, pass_chance ): original_image = rs.open(original_image_path) geojson_markup = gp.read_file(markup_path) geojson_markup = geojson_markup.to_crs(original_image.crs) pieces_info = pd.read_csv(pieces_info_path) for i in tqdm(range(len(pieces_info))): poly_piece_name = pieces_info['piece_geojson'][i] start_x = pieces_info["start_x"][i] start_y = pieces_info["start_y"][i] x, y = original_image.transform * (start_x + 1, start_y + 1) filename, _ = os.path.splitext(poly_piece_name) try: poly_piece = gp.read_file(os.path.join(poly_pieces_path, poly_piece_name)) except fiona.errors.DriverError: print('Polygon is not found.') remove_piece( filename, poly_pieces_path, image_pieces_path, mask_pieces_path ) continue if random() < pass_chance: continue intersection = gp.overlay(geojson_markup, poly_piece, how='intersection') adjacency_list = compose_adjacency_list(intersection['geometry']) components = get_components(intersection['geometry'], adjacency_list) multi_polys = [] for component in components: multi_polys.append(MultiPolygon(poly for poly in component)) png_file = os.path.join(mask_pieces_path, filename + '.png') if len(multi_polys) == 0 or (imageio.imread(png_file)).sum() < 255 * pxl_size_threshold: remove_piece( filename, poly_pieces_path, image_pieces_path, mask_pieces_path )
def find_neighbors_in_shape_file(paths, existing_neighbors): """This function finds the neighbors in the shape file. Somehow, max-p cannot figure out the correct neighbors and some clusters are physically neighbors but they are not considered as neighbors. This is where this function comes in. :param folder_names = The names of all the folders created for output. :param existing_neighbors = The neighbors matrix that is created by using w and knn. The new neighbors are to be added to this matrix. : """ df = gpd.read_file(paths["parts_max_p"] + 'max_p_combined.shp') df["NEIGHBORS"] = None for index, cluster_number in df.iterrows(): # get 'not disjoint' countries import pdb; pdb.set_trace() neighbors = df[~df.geometry.disjoint(cluster_number.geometry.buffer(0.005))].CL.tolist() df1 = df df1.crs = {'init': 'epsg:4326'} df1 = df1.to_crs({'init': 'epsg:32662'}) df2 = cluster_number.to_frame().T df2 = gpd.GeoDataFrame(df2, geometry='geometry') df2.crs = {'init': 'epsg:4326'} df2 = df2.to_crs({'init': 'epsg:32662'}) df2.geometry = df2.geometry.buffer(100) # in m test = gpd.overlay(df1, df2, how='intersection') test['area'] = test['geometry'].area / 10 ** 6 # in km² test = test[test['area'] > 0.01] # avoids that neighbors share only a point or a very small area neighbors2 = test.CL_1.tolist() neighbors = neighbors2 # remove own name from the list neighbors = [cl_no for cl_no in neighbors if cluster_number.CL != cl_no] # add names of neighbors as NEIGHBORS value df.at[index, "NEIGHBORS"] = ','.join(str(n) for n in neighbors) # Making the w.neighbors dictionary for replacing it in max_p_algorithm_2. neighbors_corrected = dict() for index, row in df.iterrows(): neighbors_for_one = row['NEIGHBORS'].split(',') neighbors_int = list() for neighbor in neighbors_for_one: if neighbor: neighbors_int.append(int(neighbor)) neighbors_corrected[index] = neighbors_int for value in existing_neighbors[index]: if value not in neighbors_corrected[index]: neighbors_corrected[index].append(value) neighbors_corrected[index] = sorted(neighbors_corrected[index]) return neighbors_corrected
def drawSectors(center, radius, sectors, start, steps): end = 360 + start # end of circle in degrees # prepare parameters if start > end: start = start - 360 else: pass step_angle_width = (end - start) / steps sector_width = (end - start) / sectors steps_per_sector = int(math.ceil(steps / sectors)) features = [] for x in xrange(0, int(sectors)): segment_vertices = [] # first the center and first point segment_vertices.append(polar_point(center, 0, 0)) segment_vertices.append( polar_point(center, start + x * sector_width, radius)) # then the sector outline points for z in xrange(1, steps_per_sector): segment_vertices.append( (polar_point(center, start + x * sector_width + z * step_angle_width, radius))) # then again the center point to finish the polygon segment_vertices.append( polar_point(center, start + x * sector_width + sector_width, radius)) segment_vertices.append(polar_point(center, 0, 0)) # create feature features.append(Polygon(segment_vertices)) polys2 = gpd.GeoSeries(features) global df2 df2 = gpd.GeoDataFrame({'geometry': polys2, 'id': range(sectors)}) df2.to_file("./output/sectors.shp") global res #shapefile res = gpd.overlay( df2, df1, how='intersection') #res_union=gpd.overlay(df1,df2,how='union') res.to_file("./output/result.shp")
def image_to_walls(img): """From an image, retrieve the walls. Aimed at usage for the structure input images. Args: img (np.array): Input image from structure dataset. Returns: geodataframe with the walls as a single polygon. """ gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) contours, _ = cv2.findContours(gray_img, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) poly_list = [transform_contour_to_polygon(_contour) for _contour in contours] poly_list = [x for x in poly_list if x] rooms = gpd.GeoDataFrame(geometry=poly_list) rooms["area"] = rooms.area rooms = rooms.sort_values("area", ascending=False) rooms = rooms.tail(-1).reset_index( drop=True ) # drop the largest area area outside of the floor # find floors: floors = ( gpd.GeoDataFrame(geometry=[rooms.unary_union]).explode().reset_index(drop=True) ) # find best overlapping areas so they can be removed. best_matches = {} for i, _floor in floors.iterrows(): _floor = _floor.geometry best_overlap = 0 for j, _room in rooms.iterrows(): _room = _room.geometry if _floor.intersects(_room): overlap = _floor.intersection(_floor).area / _floor.area * 100 if overlap > best_overlap: best_matches[i] = j best_overlap = overlap rooms = rooms.drop(best_matches.values(), axis=0) res_union = gpd.overlay(floors, rooms, how="difference") res_union.geometry = res_union.scale(yfact=-1, origin=(0, 0)) return res_union
def buffer_point_polygon_overlay(df, buff_dist=2000, method='difference', oid_fld='NewID', erase_shp_files=[]): """Generate <num_points> random points within a geometry. Parameters --------------------------------- df: a GeoPandas GeoDataFrame This is a GeoDataFrame which contains the point geometries. buff_dist: number of points to generate within the polygon See above. method: see GeoPandas overlay doc for how=keyword num_points_fld: field containing number of points to generate within associated geometry oid_fld: field containing value to assign each geometry created within a village erase_shp_files: list containing paths to additional shapefiles for which to erase from buffered household geometries Usage Notes --------------------------------- """ # Replace the dataframe geometries with buffers df['geometry'] = df['geometry'].buffer(buff_dist) new_df = df.copy() # Iteratively erase the additional geometries for erase_shp in erase_shp_files: # load the new shape file and make sure it has the same spatial reference temp_df = gpd.read_file(erase_shp) print(temp_df.crs, df.crs) assert temp_df.crs['init'] == df.crs['init'] # erase the geometry from the buffered point dataframe new_df = gpd.overlay(new_df, temp_df, how=method) # ensure same CRS new_df.crs = df.crs return new_df
def calculate_kde(clean_sur,years): contour_collection=[] for year in years: #get year data kdev = str_to_class("kdeclus"+(str(year))) year_data = str(kdev.objects.filter(surname=clean_sur).values('kde')) #prepare data idx = [int(x) for x in year_data.split(';')[0][21:].split(',')] kdx = [int(x) for x in year_data.split(';')[1][:-5].split(',')] #pd dataframe kdf = pd.DataFrame({'gid':idx,'val':kdx}) kdf = kdf[(kdf['val'] <= level)] #add values to grid kde = pd.merge(gridc,kdf,on='gid',how='inner') coord = [[int(x[1]),int(x[0])] for x in (list(zip(kde.x,kde.y)))] cs,lbls = dbscan(coord,eps=2000) kde = kde.copy() kde['group'] = lbls kde = kde[(kde['group'] >= 0)] #group to concave points contourp = to_concave_points(kde,coord) #clip contours = gpd.GeoSeries([Polygon(contour) for contour in contourp if len(contour) >= 3]) contours = gpd.GeoDataFrame({'geometry': contours}) contours.crs = from_epsg(27700) clp_prj = gpd.overlay(uk,contours,how='intersection') #smooth and project clp_prj['geometry'] = clp_prj.geometry.buffer(10000,join_style=1).buffer(-10000,join_style=1) clp_prj['geometry'] = clp_prj['geometry'].to_crs(epsg=4326) #to json contourprj = clp_prj.to_json() #add to collection data = [] data.append(year) data.append(contourprj) contour_collection.append(data) #return return(contour_collection)
def allocate_population_to_raster(self): pop_shp = gpd.read_file(self.filepath.root_tmp_path + 'pop_shp.shp') in_rst_fn = self.filepath.root_work_path + 'dem_aggr_rst.tif' rst_shp = self.polygonize_raster_layer(in_rst_fn) # rst_shp = gpd.read_file(self.filepath.root_work_path + 'rst_shp.shp') res_intersection = gpd.overlay(rst_shp, pop_shp, how='intersection') # res_intersection.to_file(self.filepath.root_work_path + 'intersection_shp.shp') # res_intersection = gpd.read_file(self.filepath.root_work_path + 'intersection_shp.shp') original_size = pop_shp.area[0] res_intersection['TOT_P'] = res_intersection['TOT_P'] * ( res_intersection.area / original_size) if in_rst_fn not in self.raster_metadata: self.load_raster_metadata(in_rst_fn) out_rst_fn = self.filepath.root_work_path + 'pop_rst.tif' with rasterio.open(out_rst_fn, 'w+', **self.raster_metadata[in_rst_fn]) as out_rst: out_rst_data = out_rst.read(1) shapes = ((geom, value) for geom, value in zip( res_intersection.geometry, res_intersection.shape[0] * [0]) if features.is_valid_geom(geom)) burned = features.rasterize(shapes=shapes, fill=0, out=out_rst_data, transform=out_rst.transform, all_touched=True, merge_alg=MergeAlg.replace) out_rst.write_band(1, burned) out_rst_data = out_rst.read(1) shapes = ((geom, value) for geom, value in zip( res_intersection.geometry, res_intersection.TOT_P) if features.is_valid_geom(geom)) burned = features.rasterize(shapes=shapes, fill=0, out=out_rst_data, transform=out_rst.transform, all_touched=True, merge_alg=MergeAlg.add) out_rst.write_band(1, burned) return
def intersect_grid_with_habitat(self, grid_gdf, hab_gdf): """Returns list of geodataframes where grid_gdf intersects hab_gdf""" with open('missing_joins.txt', 'w+') as f: for _, grid_tile in grid_gdf.iterrows(): tile = gpd.GeoDataFrame({'OrthoID': [grid_tile['OrthoID']], 'geometry': [grid_tile['geometry']]}) tile.crs = {'init' :'epsg:32639'} gdf_sub = gpd.overlay(tile, hab_gdf, how='intersection') tile_folder = self.out_folder.joinpath(f'{grid_tile["OrthoID"]}') if not tile_folder.exists(): tile_folder.mkdir(parents=True, exist_ok=True) outfile = tile_folder.joinpath(f'{grid_tile["OrthoID"]}.shp') gdf_sub.crs = {'init' :'epsg:32639'} if not gdf_sub.empty: gdf_sub.to_file(outfile) else: f.write(grid_tile['OrthoID'] + '\n')
def test_intersection(self): df = overlay(self.polydf, self.polydf2, how="intersection") assert df['BoroName'][0] is not None assert df.shape == (68, 7)
def test_geoseries_warning(dfs): df1, df2 = dfs # Issue #305 with pytest.raises(NotImplementedError): overlay(df1, df2.geometry, how="union")
def test_duplicate_column_name(dfs): df1, df2 = dfs df2r = df2.rename(columns={'col2': 'col1'}) res = overlay(df1, df2r, how="union") assert ('col1_1' in res.columns) and ('col1_2' in res.columns)
def test_identity(self): df = overlay(self.polydf, self.polydf2, how="identity") assert df.shape == (154, 7)
def test_bad_how(dfs): df1, df2 = dfs with pytest.raises(ValueError): overlay(df1, df2, how="spandex")
def test_duplicate_column_name(self): polydf2r = self.polydf2.rename(columns={'value2': 'Shape_Area'}) df = overlay(self.polydf, polydf2r, how="union") self.assertTrue('Shape_Area_2' in df.columns and 'Shape_Area' in df.columns)
def time_overlay(self, op): overlay(self.countries, self.capitals, how=op)
def test_identity(self): df = overlay(self.polydf, self.polydf2, how="identity") self.assertEquals(df.shape, (154, 7))
def test_symmetric_difference(self): df = overlay(self.polydf, self.polydf2, how="symmetric_difference") self.assertEquals(df.shape, (122, 7))
def test_nonpoly(self): with pytest.raises(TypeError): overlay(self.pointdf, self.polydf, how="union")
def test_intersection(self): df = overlay(self.polydf, self.polydf2, how="intersection") self.assertIsNotNone(df['BoroName'][0]) self.assertEquals(df.shape, (68, 7))
def test_symmetric_difference(self): df = overlay(self.polydf, self.polydf2, how="symmetric_difference") assert df.shape == (122, 7)
def time_overlay(self, op): overlay(self.df1, self.df2, how=op)
def test_difference(self): df = overlay(self.polydf, self.polydf2, how="difference") self.assertEquals(df.shape, (86, 7))
def test_bad_how(self): with pytest.raises(ValueError): overlay(self.polydf, self.polydf, how="spandex")
def test_difference(self): df = overlay(self.polydf, self.polydf2, how="difference") assert df.shape == (86, 7)
def f(): overlay(self.polydf, self.polydf2.geometry, how="union")
def test_union(self): df = overlay(self.polydf, self.polydf2, how="union") self.assertTrue(type(df) is GeoDataFrame) self.assertEquals(df.shape, self.union_shape) self.assertTrue('value1' in df.columns and 'Shape_Area' in df.columns)