def test_drop_duplicates_frame():
    # currently, dropping duplicates in a geodataframe produces a TypeError
    # better behavior would be dropping the duplicated points
    gdf_len = 3
    dup_gdf = GeoDataFrame({'geometry': [Point(0, 0) for _ in range(gdf_len)],
                            'value1': range(gdf_len)})
    dropped_geometry = dup_gdf.drop_duplicates(subset="geometry")
    assert len(dropped_geometry) == 1
    dropped_all = dup_gdf.drop_duplicates()
    assert len(dropped_all) == gdf_len
def test_drop_duplicates_frame():
    # currently, dropping duplicates in a geodataframe produces a TypeError
    # better behavior would be dropping the duplicated points
    gdf_len = 3
    dup_gdf = GeoDataFrame({'geometry': [Point(0, 0) for _ in range(gdf_len)],
                            'value1': range(gdf_len)})
    dropped_geometry = dup_gdf.drop_duplicates(subset="geometry")
    assert len(dropped_geometry) == 1
    dropped_all = dup_gdf.drop_duplicates()
    assert len(dropped_all) == gdf_len
Beispiel #3
0
def test_drop_duplicates_frame():
    # duplicated does not yet use EA machinery, see above
    gdf_len = 3
    dup_gdf = GeoDataFrame(
        {"geometry": [Point(0, 0) for _ in range(gdf_len)], "value1": range(gdf_len)}
    )
    dropped_geometry = dup_gdf.drop_duplicates(subset="geometry")
    assert len(dropped_geometry) == 1
    dropped_all = dup_gdf.drop_duplicates()
    assert len(dropped_all) == gdf_len
Beispiel #4
0
def livnehIDsAndAreas(df: geopandas.GeoDataFrame, crs: str = '4326') -> dict:
    # clipped data
    df.drop_duplicates(['id'], inplace=True)
    df.sort_values(['id'], axis=0, inplace=True)
    df = df.to_crs(epsg=crs)
    df['area_m2'] = df['geometry'].area
    df = df.filter(items=['coordinates', 'lat', 'lon', 'id', 'area_m2'])
    df = __points2grids(df, crs=crs)
    df = df.to_crs(epsg=crs)
    df['total_area_m2'] = df['geometry'].area
    return df
Beispiel #5
0
def PlnResultsIntegrates(refname, px, py, epsg, wannaview=False, tfac=1):
    '''
	Integrates PLN results in a unique file and exports shapefile

	refname(string)
		Scenario sufix name to process
	'''
    # get list of files
    fnames = glob(refname + '*.PLN*')
    N = 100 / float(len(fnames))

    # loop in files name
    for fname in fnames:
        # reads pln file
        # if final gdf exists ignore results
        if 'Fgdf' in locals():
            try:
                # converts file to dataframe
                gdf = ReadPLN(fname, px, py, epsg, tfac=tfac)
                gdf['prob001'] = (gdf['thickness'].values > 0.01) * 1
                gdf['prob01'] = (gdf['thickness'].values > 0.1) * 1
                gdf['prob1'] = (gdf['thickness'].values > 1) * 1
                gdf['prob10'] = (gdf['thickness'].values > 10) * 1
                # concatenates
                Fgdf = GeoDataFrame(pd.concat([Fgdf, gdf]))
                # gets cocorrence
                prob = Fgdf.groupby(['x', 'y'], as_index=False).sum()
                prob = prob.sort_values(['x', 'y'])
                # gets maximum thickness
                thickness = Fgdf.groupby(['x', 'y'], as_index=False).max()
                thickness = thickness.sort_values(['x', 'y'])
                Fgdf = Fgdf.drop_duplicates(['x', 'y'])
                Fgdf = Fgdf.sort_values(['x', 'y'])
                Fgdf['thickness'] = thickness['thickness'].values
                Fgdf['prob001'] = prob['prob001'].values
                Fgdf['prob01'] = prob['prob01'].values
                Fgdf['prob1'] = prob['prob1'].values
                Fgdf['prob10'] = prob['prob10'].values
            except:
                print('error in scenario {}'.format(fname))
                pass
        else:
            try:
                # creates final dataframe
                Fgdf = ReadPLN(fname, px, py, epsg, wannaview=wannaview)
                Fgdf['prob001'] = (Fgdf['thickness'].values > 0.01) * 1
                Fgdf['prob01'] = (Fgdf['thickness'].values > 0.1) * 1
                Fgdf['prob1'] = (Fgdf['thickness'].values > 1) * 1
                Fgdf['prob10'] = (Fgdf['thickness'].values > 10) * 1

            except:
                print('error in scenario {}'.format(fname))
                pass
    Fgdf['prob001'] = Fgdf['prob001'].values * N
    Fgdf['prob01'] = Fgdf['prob01'].values * N
    Fgdf['prob1'] = Fgdf['prob1'].values * N
    Fgdf['prob10'] = Fgdf['prob10'].values * N
    return Fgdf
def load_crime_stats(population_group=None, crime_list=None, provence=None):
    # lower provers
    if provence is not None:
        provence = provence.lower()
    # get data set dir
    data_path = get_work_path()
    # load an clean police
    police_stats = clean_police_stats(
        data_path.joinpath('Police_Statistics___2005_-_2017.csv'))
    if crime_list is not None:
        police_stats = police_stats[police_stats['Crime'].isin(crime_list)]
    if provence is not None:
        police_stats = police_stats.query(f"Province == '{provence}'")
    # population shape file
    pop_stats = clean_popluation_stats(
        data_path.joinpath(
            'population/geo_export_3ec3ac74-ddff-4220-8007-b9b5643f79af.shp'))
    base_group = ['sal_code_i', 'pr_name', 'sp_name', 'geometry']
    if population_group is not None:
        # filter out columns
        pop_stats = pop_stats[pop_groups[population_group] + base_group]
    if provence is not None:
        pop_stats = pop_stats.query(f"pr_name == '{provence}'")
    # shape id to weights
    precinct = clean_area_2_precint(
        data_path.joinpath('Precinct_to_small_area_weights.csv'))
    # munge data
    df = merge(precinct,
               pop_stats,
               left_on='small_area',
               right_on='sal_code_i')
    df = merge(df, police_stats, left_on='precinct', right_on='Police Station')
    # calclate crime per shape file as proportion of precint weight
    df['total_crime'] = df.weight * df.Incidents
    # keep as geo-dataframe
    df = GeoDataFrame(df, crs=pop_stats.crs)
    # clean data frame
    df = df.drop([
        'sal_code_i', 'pr_name', 'sp_name', 'Police Station', 'Incidents',
        'weight'
    ],
                 axis=1)
    # agg precinct back into shapes
    temp_df = df.groupby(['small_area', 'Year',
                          'Crime'])[['total_crime']].sum().round()
    df = df.drop_duplicates(subset=['small_area', 'Year', 'Crime']).drop(
        ['total_crime'], axis=1)
    df = merge(df, temp_df, on=['small_area', 'Year', 'Crime'])
    return df
Beispiel #7
0
def _merge_vector_feature(eopatches, feature):
    """Merges GeoDataFrames of a vector feature."""
    dataframes = _extract_feature_values(eopatches, feature)

    if len(dataframes) == 1:
        return dataframes[0]

    crs_list = [dataframe.crs for dataframe in dataframes if dataframe.crs is not None]
    if not crs_list:
        crs_list = [None]
    if not _all_equal(crs_list):
        raise ValueError(f"Cannot merge feature {feature} because dataframes are defined for different CRS")

    merged_dataframe = GeoDataFrame(pd.concat(dataframes, ignore_index=True), crs=crs_list[0])
    merged_dataframe = merged_dataframe.drop_duplicates(ignore_index=True)
    # In future a support for vector operations could be added here

    return merged_dataframe
Beispiel #8
0
def remove_truly_duplicated_geometries(data: geopandas.GeoDataFrame):
    return data.drop_duplicates("geometry")
Beispiel #9
0
inv = pd.read_excel(choice + '/islims_inventory.xlsx') # (see 0.0a iSlims and City Work Data)
wo = pd.read_excel(choice + '/islims_workorders.xlsx') # (see 0.0a iSlims and City Work Data)
NCR2 = pd.read_excel(choice + '/NCR.xlsx') # (from 0.0a)
DCR = pd.read_excel(choice + '/DCR.xlsx') # (from 0.0a)

wo = wo.rename(columns={'woID':'WoID'})
isf_wo = pd.merge(isf, wo, how='left', on = 'WoID')
isf_wo = isf_wo.drop(['srchAssetID', 'gpscoordinateX', 'gpscoordinateY', 'initialproblemID', \
       'resolveddatetime', 'entereddate', 'finalresolutionID'], axis = 1)
isf_wo_inv = pd.merge(isf_wo, inv, how='left', on = 'inventoryID')
isf_wo_inv = isf_wo_inv.drop(['gpscoordinateX', 'gpscoordinateY'], axis = 1)

# Setting up data into geopandas
geometry = [Point(xy) for xy in zip(isf_wo_inv['gpsX'], isf_wo_inv['gpsY'])]
gLights2 = GeoDataFrame(isf_wo_inv, geometry=geometry)
gLights2 = gLights2.drop_duplicates(subset = ['WoID'])
geometry = [Point(xy) for xy in zip(NCR2['X'], NCR2['Y'])]
gNCR2 = GeoDataFrame(NCR2, geometry=geometry)

BUFFER = .000625 # 1/4th of a city block in radius of Maryland coordinates.
#BUFFER = .00125 # 1/2 of a city block in radius of Maryland coordinates.

gLights_Buff2 = gLights2.assign(geometry = lambda x: x.geometry.buffer(BUFFER)) 
# Overwrites geometry variable with a buffer centered at the point of interest. A.k.a. applies the function geometry(x) to gNCR and saves it as geometry.

Matched_NLights = gpd.sjoin(gLights_Buff2, gNCR2, 'left')

Matched_NLights['Crime_LO_intime'] = [0]*len(Matched_NLights) # Counter to be used

Matched_NLights = Matched_NLights.dropna(subset = ['WoCompleted'])
Matched_NLights = Matched_NLights.dropna(subset = ['REPORT_DAT'])