def building_density_per_block(bldgs: gpd.GeoDataFrame, blocks: gpd.GeoDataFrame) -> gpd.GeoDataFrame: ''' Adds a column to blocks dataframe which contains the total building area and the building density ''' assert 'block_id' in bldgs.columns, "ERROR: bldgs dataframe does not have block_id" bldgs['bldg_area'] = bldgs.area bldgs['bldg_count'] = 1 bldg_area_by_block = bldgs[['block_id', 'bldg_area', 'bldg_count']].groupby('block_id').sum() bldg_area_by_block.reset_index(inplace=True) for c in ['bldg_area', 'bldg_count']: if c in blocks.columns: blocks.drop(columns=[c], inplace=True) blocks = blocks.merge(bldg_area_by_block, how='left', on='block_id') blocks['block_area'] = blocks.area blocks['bldg_density'] = blocks['bldg_area'] / blocks['block_area'] blocks.fillna(value=0.0, inplace=True) return blocks
def _check_topology_field(self, input_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: if self._TOPO_FIELD not in input_gdf.columns.tolist(): input_gdf[self._TOPO_FIELD] = input_gdf.index.apply( lambda x: int(x)) input_gdf = input_gdf.fillna(self._DEFAULT_NAN_VALUE_TO_USE) return input_gdf
def sanitise_geodataframe(gdf): if isinstance(gdf, GeoSeries): gdf = GeoDataFrame(gdf) gdf = gdf.fillna('None') object_columns = gdf.select_dtypes(['object']).columns for col in object_columns: if gdf[col].apply(lambda x: isinstance(x, (set, list))).any(): gdf[col] = gdf[col].apply(lambda x: ','.join(x)) elif gdf[col].apply(lambda x: isinstance(x, dict)).any(): gdf[col] = gdf[col].apply(lambda x: str(x)) return gdf
def orig_dest(fd: gpd.GeoDataFrame, taz: gpd.GeoDataFrame, taz_name="tazce", plc_name="placefp", cnt_name="countyfp") -> gpd.GeoDataFrame: r""" Add County and Place codes to origin-destination data This function adds County and Census Designated Place codes from the GeoDataFrame `taz` to the origin-destination or flow GeoDataFrame `fd`. The relevant column names are defined in `taz_name`, `plc_name`, and `cnt_name`, respectively. The column names in the output GeoDataFrame are "orig_taz", "dest_taz", "orig_plc", "dest_plc", "orig_cnt", and "dest_cnt". Parameters ---------- taz : geopandas.GeoDataFrame GeoDataFrame containing Traffic Analysis (TAZ) codes, Census Designated Place codes, and County codes. taz_name : str, defaults to "tazce" Column name in `taz` GeoDataFrame that contains TAZ codes. Defaults to "tazce". plc_name : str, defaults to "placefp" Column name in `taz` GeoDataFrame that contains Census Designated Place codes. Defaults to "placefp". cnt_name : str, defaults to "countyfp" Column name in `taz` GeoDataFrame that contains County codes. Defaults to "countyfp". Returns ------- geopandas.GeoDataFrame GeoDataFrame with origin and destination TAZ, County, and Place codes. The column names are "orig_taz", "dest_taz", "orig_plc", "dest_plc", "orig_cnt", and "dest_cnt". See Also -------- ~stplanpy.acs.read_acs ~stplanpy.geo.in_place Examples -------- The example data files: "`od_data.csv`_", "`tl_2011_06_taz10.zip`_", and "`tl_2020_06_place.zip`_", can be downloaded from github. .. code-block:: python from stplanpy import acs from stplanpy import geo from stplanpy import od # Read origin-destination flow data flow_data = acs.read_acs("od_data.csv") flow_data = flow_data.clean_acs() # San Francisco Bay Area counties counties = ["001", "013", "041", "055", "075", "081", "085", "095", "097"] # Place code East Palo Alto places = ["20956"] # Read place data place = geo.read_shp("tl_2020_06_place.zip") # Keep only East Palo Alto place = place[place["placefp"].isin(places)] # Read taz data taz = geo.read_shp("tl_2011_06_taz10.zip") # Rename columns for consistency taz.rename(columns = {"countyfp10":"countyfp", "tazce10":"tazce"}, inplace = True) # Filter on county codes taz = taz[taz["countyfp"].isin(counties)] # Compute which taz lay inside a place and which part taz = taz.in_place(place) # Add county and place codes to data frame. flow_data = flow_data.orig_dest(taz) .. _od_data.csv: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/od_data.csv .. _tl_2011_06_taz10.zip: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/tl_2011_06_taz10.zip .. _tl_2020_06_place.zip: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/tl_2020_06_place.zip """ # Drop lines that have no valid countyfp or placefp. i.e. are not within a # county or place cnt = taz.dropna(subset=[cnt_name]) cnt = cnt.drop(columns="geometry") plc = taz.dropna(subset=[plc_name]) plc = plc.drop(columns="geometry") # We do not know the distribution of origins or destinations within a TAZ. # Therefore, add TAZ to place if more than 0.5 of its surface area is within # this place. plc = plc.loc[plc["area"] > 0.5] # Merge on countyfp codes fd = fd.merge(cnt, how="left", left_on="orig_taz", right_on=taz_name) fd.rename(columns={cnt_name: "orig_cnt"}, inplace=True) fd = fd.drop(columns=[taz_name, plc_name, "area"]) fd = fd.merge(cnt, how="left", left_on="dest_taz", right_on=taz_name) fd.rename(columns={cnt_name: "dest_cnt"}, inplace=True) fd = fd.drop(columns=[taz_name, plc_name, "area"]) # Merge on placefp codes fd = fd.merge(plc, how="left", left_on="orig_taz", right_on=taz_name) fd.rename(columns={plc_name: "orig_plc"}, inplace=True) fd = fd.drop(columns=[taz_name, cnt_name, "area"]) fd = fd.merge(plc, how="left", left_on="dest_taz", right_on=taz_name) fd.rename(columns={plc_name: "dest_plc"}, inplace=True) fd = fd.drop(columns=[taz_name, cnt_name, "area"]) # Clean up data frame fd.fillna({"orig_plc": "", "dest_plc": ""}, inplace=True) return fd
def _RANSAC_outlier_detection(self, inGDF): """Detect geometric outliers between point cloud of source and estimated coordinates using RANSAC algorithm.""" # from skimage.transform import PolynomialTransform # import here to avoid static TLS ImportError src_coords = np.array(inGDF[['X_UTM', 'Y_UTM']]) xyShift = np.array(inGDF[['X_SHIFT_M', 'Y_SHIFT_M']]) est_coords = src_coords + xyShift for co, n in zip([src_coords, est_coords], ['src_coords', 'est_coords']): assert co.ndim == 2 and co.shape[1] == 2, "'%s' must have shape [Nx2]. Got shape %s." % (n, co.shape) if not 0 < self.rs_max_outlier_percentage < 100: raise ValueError min_inlier_percentage = 100 - self.rs_max_outlier_percentage # class PolyTF_1(PolynomialTransform): # pragma: no cover # def estimate(*data): # return PolynomialTransform.estimate(*data, order=1) # robustly estimate affine transform model with RANSAC # eliminates not more than the given maximum outlier percentage of the tie points model_robust, inliers = None, None count_inliers = None th = 5 # start RANSAC threshold th_checked = {} # dict of thresholds that already have been tried + calculated inlier percentage th_substract = 2 count_iter = 0 time_start = time.time() ideal_count = min_inlier_percentage * src_coords.shape[0] / 100 # optimize RANSAC threshold so that it marks not much more or less than the given outlier percentage while True: if th_checked: th_too_strict = count_inliers < ideal_count # True if too less inliers remaining # calculate new theshold using old increment (but ensure th_new>0 by adjusting increment if needed) th_new = 0 while th_new <= 0: th_new = th + th_substract if th_too_strict else th - th_substract if th_new <= 0: th_substract /= 2 # check if calculated new threshold has been used before th_already_checked = th_new in th_checked.keys() # if yes, decrease increment and recalculate new threshold th_substract = th_substract if not th_already_checked else th_substract / 2 th = th_new if not th_already_checked else \ (th + th_substract if th_too_strict else th - th_substract) # RANSAC call # model_robust, inliers = ransac((src, dst), PolynomialTransform, min_samples=3, if src_coords.size and est_coords.size: # import here to avoid static TLS ImportError from skimage.measure import ransac from skimage.transform import AffineTransform model_robust, inliers = \ ransac((src_coords, est_coords), AffineTransform, min_samples=6, residual_threshold=th, max_trials=2000, stop_sample_num=int((min_inlier_percentage - self.rs_tolerance) / 100 * src_coords.shape[0]), stop_residuals_sum=int( (self.rs_max_outlier_percentage - self.rs_tolerance) / 100 * src_coords.shape[0]) ) else: inliers = np.array([]) break count_inliers = np.count_nonzero(inliers) th_checked[th] = count_inliers / src_coords.shape[0] * 100 # print(th,'\t', th_checked[th], ) if min_inlier_percentage - self.rs_tolerance < th_checked[th] < min_inlier_percentage + self.rs_tolerance: # print('in tolerance') break if count_iter > self.rs_max_iter or time.time() - time_start > self.rs_timeout: break # keep last values and break while loop count_iter += 1 outliers = inliers.__eq__(False) if inliers is not None and inliers.size else np.array([]) if inGDF.empty or outliers is None or (isinstance(outliers, list) and not outliers) or \ (isinstance(outliers, np.ndarray) and not outliers.size): outseries = Series([False] * len(self.GDF)) elif len(inGDF) < len(self.GDF): inGDF['outliers'] = outliers fullGDF = GeoDataFrame(self.GDF['POINT_ID']) fullGDF = fullGDF.merge(inGDF[['POINT_ID', 'outliers']], on='POINT_ID', how="outer") # fullGDF.outliers.copy()[~fullGDF.POINT_ID.isin(GDF.POINT_ID)] = False fullGDF = fullGDF.fillna(False) # NaNs are due to exclude_previous_outliers outseries = fullGDF['outliers'] else: outseries = Series(outliers) assert len(outseries) == len(self.GDF), 'RANSAC output validation failed.' self.ransac_model_robust = model_robust return outseries