def test_drop_duplicates_frame(): # currently, dropping duplicates in a geodataframe produces a TypeError # better behavior would be dropping the duplicated points gdf_len = 3 dup_gdf = GeoDataFrame({'geometry': [Point(0, 0) for _ in range(gdf_len)], 'value1': range(gdf_len)}) dropped_geometry = dup_gdf.drop_duplicates(subset="geometry") assert len(dropped_geometry) == 1 dropped_all = dup_gdf.drop_duplicates() assert len(dropped_all) == gdf_len
def test_drop_duplicates_frame(): # duplicated does not yet use EA machinery, see above gdf_len = 3 dup_gdf = GeoDataFrame( {"geometry": [Point(0, 0) for _ in range(gdf_len)], "value1": range(gdf_len)} ) dropped_geometry = dup_gdf.drop_duplicates(subset="geometry") assert len(dropped_geometry) == 1 dropped_all = dup_gdf.drop_duplicates() assert len(dropped_all) == gdf_len
def livnehIDsAndAreas(df: geopandas.GeoDataFrame, crs: str = '4326') -> dict: # clipped data df.drop_duplicates(['id'], inplace=True) df.sort_values(['id'], axis=0, inplace=True) df = df.to_crs(epsg=crs) df['area_m2'] = df['geometry'].area df = df.filter(items=['coordinates', 'lat', 'lon', 'id', 'area_m2']) df = __points2grids(df, crs=crs) df = df.to_crs(epsg=crs) df['total_area_m2'] = df['geometry'].area return df
def PlnResultsIntegrates(refname, px, py, epsg, wannaview=False, tfac=1): ''' Integrates PLN results in a unique file and exports shapefile refname(string) Scenario sufix name to process ''' # get list of files fnames = glob(refname + '*.PLN*') N = 100 / float(len(fnames)) # loop in files name for fname in fnames: # reads pln file # if final gdf exists ignore results if 'Fgdf' in locals(): try: # converts file to dataframe gdf = ReadPLN(fname, px, py, epsg, tfac=tfac) gdf['prob001'] = (gdf['thickness'].values > 0.01) * 1 gdf['prob01'] = (gdf['thickness'].values > 0.1) * 1 gdf['prob1'] = (gdf['thickness'].values > 1) * 1 gdf['prob10'] = (gdf['thickness'].values > 10) * 1 # concatenates Fgdf = GeoDataFrame(pd.concat([Fgdf, gdf])) # gets cocorrence prob = Fgdf.groupby(['x', 'y'], as_index=False).sum() prob = prob.sort_values(['x', 'y']) # gets maximum thickness thickness = Fgdf.groupby(['x', 'y'], as_index=False).max() thickness = thickness.sort_values(['x', 'y']) Fgdf = Fgdf.drop_duplicates(['x', 'y']) Fgdf = Fgdf.sort_values(['x', 'y']) Fgdf['thickness'] = thickness['thickness'].values Fgdf['prob001'] = prob['prob001'].values Fgdf['prob01'] = prob['prob01'].values Fgdf['prob1'] = prob['prob1'].values Fgdf['prob10'] = prob['prob10'].values except: print('error in scenario {}'.format(fname)) pass else: try: # creates final dataframe Fgdf = ReadPLN(fname, px, py, epsg, wannaview=wannaview) Fgdf['prob001'] = (Fgdf['thickness'].values > 0.01) * 1 Fgdf['prob01'] = (Fgdf['thickness'].values > 0.1) * 1 Fgdf['prob1'] = (Fgdf['thickness'].values > 1) * 1 Fgdf['prob10'] = (Fgdf['thickness'].values > 10) * 1 except: print('error in scenario {}'.format(fname)) pass Fgdf['prob001'] = Fgdf['prob001'].values * N Fgdf['prob01'] = Fgdf['prob01'].values * N Fgdf['prob1'] = Fgdf['prob1'].values * N Fgdf['prob10'] = Fgdf['prob10'].values * N return Fgdf
def load_crime_stats(population_group=None, crime_list=None, provence=None): # lower provers if provence is not None: provence = provence.lower() # get data set dir data_path = get_work_path() # load an clean police police_stats = clean_police_stats( data_path.joinpath('Police_Statistics___2005_-_2017.csv')) if crime_list is not None: police_stats = police_stats[police_stats['Crime'].isin(crime_list)] if provence is not None: police_stats = police_stats.query(f"Province == '{provence}'") # population shape file pop_stats = clean_popluation_stats( data_path.joinpath( 'population/geo_export_3ec3ac74-ddff-4220-8007-b9b5643f79af.shp')) base_group = ['sal_code_i', 'pr_name', 'sp_name', 'geometry'] if population_group is not None: # filter out columns pop_stats = pop_stats[pop_groups[population_group] + base_group] if provence is not None: pop_stats = pop_stats.query(f"pr_name == '{provence}'") # shape id to weights precinct = clean_area_2_precint( data_path.joinpath('Precinct_to_small_area_weights.csv')) # munge data df = merge(precinct, pop_stats, left_on='small_area', right_on='sal_code_i') df = merge(df, police_stats, left_on='precinct', right_on='Police Station') # calclate crime per shape file as proportion of precint weight df['total_crime'] = df.weight * df.Incidents # keep as geo-dataframe df = GeoDataFrame(df, crs=pop_stats.crs) # clean data frame df = df.drop([ 'sal_code_i', 'pr_name', 'sp_name', 'Police Station', 'Incidents', 'weight' ], axis=1) # agg precinct back into shapes temp_df = df.groupby(['small_area', 'Year', 'Crime'])[['total_crime']].sum().round() df = df.drop_duplicates(subset=['small_area', 'Year', 'Crime']).drop( ['total_crime'], axis=1) df = merge(df, temp_df, on=['small_area', 'Year', 'Crime']) return df
def _merge_vector_feature(eopatches, feature): """Merges GeoDataFrames of a vector feature.""" dataframes = _extract_feature_values(eopatches, feature) if len(dataframes) == 1: return dataframes[0] crs_list = [dataframe.crs for dataframe in dataframes if dataframe.crs is not None] if not crs_list: crs_list = [None] if not _all_equal(crs_list): raise ValueError(f"Cannot merge feature {feature} because dataframes are defined for different CRS") merged_dataframe = GeoDataFrame(pd.concat(dataframes, ignore_index=True), crs=crs_list[0]) merged_dataframe = merged_dataframe.drop_duplicates(ignore_index=True) # In future a support for vector operations could be added here return merged_dataframe
def remove_truly_duplicated_geometries(data: geopandas.GeoDataFrame): return data.drop_duplicates("geometry")
inv = pd.read_excel(choice + '/islims_inventory.xlsx') # (see 0.0a iSlims and City Work Data) wo = pd.read_excel(choice + '/islims_workorders.xlsx') # (see 0.0a iSlims and City Work Data) NCR2 = pd.read_excel(choice + '/NCR.xlsx') # (from 0.0a) DCR = pd.read_excel(choice + '/DCR.xlsx') # (from 0.0a) wo = wo.rename(columns={'woID':'WoID'}) isf_wo = pd.merge(isf, wo, how='left', on = 'WoID') isf_wo = isf_wo.drop(['srchAssetID', 'gpscoordinateX', 'gpscoordinateY', 'initialproblemID', \ 'resolveddatetime', 'entereddate', 'finalresolutionID'], axis = 1) isf_wo_inv = pd.merge(isf_wo, inv, how='left', on = 'inventoryID') isf_wo_inv = isf_wo_inv.drop(['gpscoordinateX', 'gpscoordinateY'], axis = 1) # Setting up data into geopandas geometry = [Point(xy) for xy in zip(isf_wo_inv['gpsX'], isf_wo_inv['gpsY'])] gLights2 = GeoDataFrame(isf_wo_inv, geometry=geometry) gLights2 = gLights2.drop_duplicates(subset = ['WoID']) geometry = [Point(xy) for xy in zip(NCR2['X'], NCR2['Y'])] gNCR2 = GeoDataFrame(NCR2, geometry=geometry) BUFFER = .000625 # 1/4th of a city block in radius of Maryland coordinates. #BUFFER = .00125 # 1/2 of a city block in radius of Maryland coordinates. gLights_Buff2 = gLights2.assign(geometry = lambda x: x.geometry.buffer(BUFFER)) # Overwrites geometry variable with a buffer centered at the point of interest. A.k.a. applies the function geometry(x) to gNCR and saves it as geometry. Matched_NLights = gpd.sjoin(gLights_Buff2, gNCR2, 'left') Matched_NLights['Crime_LO_intime'] = [0]*len(Matched_NLights) # Counter to be used Matched_NLights = Matched_NLights.dropna(subset = ['WoCompleted']) Matched_NLights = Matched_NLights.dropna(subset = ['REPORT_DAT'])