def building_density_per_block(bldgs: gpd.GeoDataFrame,
                               blocks: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    '''
    Adds a column to blocks dataframe which contains the total
    building area and the building density
    '''

    assert 'block_id' in bldgs.columns, "ERROR: bldgs dataframe does not have block_id"

    bldgs['bldg_area'] = bldgs.area
    bldgs['bldg_count'] = 1
    bldg_area_by_block = bldgs[['block_id', 'bldg_area',
                                'bldg_count']].groupby('block_id').sum()
    bldg_area_by_block.reset_index(inplace=True)

    for c in ['bldg_area', 'bldg_count']:
        if c in blocks.columns:
            blocks.drop(columns=[c], inplace=True)
    blocks = blocks.merge(bldg_area_by_block, how='left', on='block_id')
    blocks['block_area'] = blocks.area
    blocks['bldg_density'] = blocks['bldg_area'] / blocks['block_area']

    blocks.fillna(value=0.0, inplace=True)

    return blocks
Esempio n. 2
0
    def _check_topology_field(self,
                              input_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
        if self._TOPO_FIELD not in input_gdf.columns.tolist():
            input_gdf[self._TOPO_FIELD] = input_gdf.index.apply(
                lambda x: int(x))

        input_gdf = input_gdf.fillna(self._DEFAULT_NAN_VALUE_TO_USE)
        return input_gdf
Esempio n. 3
0
def sanitise_geodataframe(gdf):
    if isinstance(gdf, GeoSeries):
        gdf = GeoDataFrame(gdf)
    gdf = gdf.fillna('None')
    object_columns = gdf.select_dtypes(['object']).columns
    for col in object_columns:
        if gdf[col].apply(lambda x: isinstance(x, (set, list))).any():
            gdf[col] = gdf[col].apply(lambda x: ','.join(x))
        elif gdf[col].apply(lambda x: isinstance(x, dict)).any():
            gdf[col] = gdf[col].apply(lambda x: str(x))
    return gdf
Esempio n. 4
0
def orig_dest(fd: gpd.GeoDataFrame,
              taz: gpd.GeoDataFrame,
              taz_name="tazce",
              plc_name="placefp",
              cnt_name="countyfp") -> gpd.GeoDataFrame:
    r"""
    Add County and Place codes to origin-destination data           

    This function adds County and Census Designated Place codes from the
    GeoDataFrame `taz` to the origin-destination or flow GeoDataFrame `fd`. The
    relevant column names are defined in `taz_name`, `plc_name`, and `cnt_name`,
    respectively. The column names in the output GeoDataFrame are "orig_taz",
    "dest_taz", "orig_plc", "dest_plc", "orig_cnt", and "dest_cnt". 
    
    Parameters
    ----------
    taz : geopandas.GeoDataFrame
        GeoDataFrame containing Traffic Analysis (TAZ) codes, Census Designated
        Place codes, and County codes.
    taz_name : str, defaults to "tazce"
        Column name in `taz` GeoDataFrame that contains TAZ codes. Defaults to
        "tazce".
    plc_name : str, defaults to "placefp"
        Column name in `taz` GeoDataFrame that contains Census Designated Place
        codes. Defaults to "placefp".
    cnt_name : str, defaults to "countyfp"
        Column name in `taz` GeoDataFrame that contains County codes. Defaults
        to "countyfp".
    
    Returns
    -------
    geopandas.GeoDataFrame
        GeoDataFrame with origin and destination TAZ, County, and Place codes.
        The column names are "orig_taz", "dest_taz", "orig_plc", "dest_plc",
        "orig_cnt", and "dest_cnt". 
    
    See Also
    --------
    ~stplanpy.acs.read_acs
    ~stplanpy.geo.in_place
    
    Examples
    --------
    The example data files: "`od_data.csv`_", "`tl_2011_06_taz10.zip`_", and
    "`tl_2020_06_place.zip`_", can be downloaded from github.

    .. code-block:: python

        from stplanpy import acs
        from stplanpy import geo
        from stplanpy import od

        # Read origin-destination flow data
        flow_data = acs.read_acs("od_data.csv")
        flow_data = flow_data.clean_acs()

        # San Francisco Bay Area counties
        counties = ["001", "013", "041", "055", "075", "081", "085", "095", "097"]

        # Place code East Palo Alto
        places = ["20956"]

        # Read place data
        place = geo.read_shp("tl_2020_06_place.zip")

        # Keep only East Palo Alto
        place = place[place["placefp"].isin(places)]

        # Read taz data
        taz = geo.read_shp("tl_2011_06_taz10.zip")

        # Rename columns for consistency
        taz.rename(columns = {"countyfp10":"countyfp", "tazce10":"tazce"}, inplace = True)

        # Filter on county codes
        taz = taz[taz["countyfp"].isin(counties)]

        # Compute which taz lay inside a place and which part
        taz = taz.in_place(place)

        # Add county and place codes to data frame.
        flow_data = flow_data.orig_dest(taz)

    .. _od_data.csv: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/od_data.csv
    .. _tl_2011_06_taz10.zip: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/tl_2011_06_taz10.zip
    .. _tl_2020_06_place.zip: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/tl_2020_06_place.zip
    """
    # Drop lines that have no valid countyfp or placefp. i.e. are not within a
    # county or place
    cnt = taz.dropna(subset=[cnt_name])
    cnt = cnt.drop(columns="geometry")
    plc = taz.dropna(subset=[plc_name])
    plc = plc.drop(columns="geometry")
    # We do not know the distribution of origins or destinations within a TAZ.
    # Therefore, add TAZ to place if more than 0.5 of its surface area is within
    # this place.
    plc = plc.loc[plc["area"] > 0.5]

    # Merge on countyfp codes
    fd = fd.merge(cnt, how="left", left_on="orig_taz", right_on=taz_name)
    fd.rename(columns={cnt_name: "orig_cnt"}, inplace=True)
    fd = fd.drop(columns=[taz_name, plc_name, "area"])
    fd = fd.merge(cnt, how="left", left_on="dest_taz", right_on=taz_name)
    fd.rename(columns={cnt_name: "dest_cnt"}, inplace=True)
    fd = fd.drop(columns=[taz_name, plc_name, "area"])

    # Merge on placefp codes
    fd = fd.merge(plc, how="left", left_on="orig_taz", right_on=taz_name)
    fd.rename(columns={plc_name: "orig_plc"}, inplace=True)
    fd = fd.drop(columns=[taz_name, cnt_name, "area"])
    fd = fd.merge(plc, how="left", left_on="dest_taz", right_on=taz_name)
    fd.rename(columns={plc_name: "dest_plc"}, inplace=True)
    fd = fd.drop(columns=[taz_name, cnt_name, "area"])

    # Clean up data frame
    fd.fillna({"orig_plc": "", "dest_plc": ""}, inplace=True)

    return fd
Esempio n. 5
0
    def _RANSAC_outlier_detection(self, inGDF):
        """Detect geometric outliers between point cloud of source and estimated coordinates using RANSAC algorithm."""
        # from skimage.transform import PolynomialTransform  # import here to avoid static TLS ImportError

        src_coords = np.array(inGDF[['X_UTM', 'Y_UTM']])
        xyShift = np.array(inGDF[['X_SHIFT_M', 'Y_SHIFT_M']])
        est_coords = src_coords + xyShift

        for co, n in zip([src_coords, est_coords], ['src_coords', 'est_coords']):
            assert co.ndim == 2 and co.shape[1] == 2, "'%s' must have shape [Nx2]. Got shape %s." % (n, co.shape)

        if not 0 < self.rs_max_outlier_percentage < 100:
            raise ValueError
        min_inlier_percentage = 100 - self.rs_max_outlier_percentage

        # class PolyTF_1(PolynomialTransform):  # pragma: no cover
        #     def estimate(*data):
        #         return PolynomialTransform.estimate(*data, order=1)

        # robustly estimate affine transform model with RANSAC
        # eliminates not more than the given maximum outlier percentage of the tie points

        model_robust, inliers = None, None
        count_inliers = None
        th = 5  # start RANSAC threshold
        th_checked = {}  # dict of thresholds that already have been tried + calculated inlier percentage
        th_substract = 2
        count_iter = 0
        time_start = time.time()
        ideal_count = min_inlier_percentage * src_coords.shape[0] / 100

        # optimize RANSAC threshold so that it marks not much more or less than the given outlier percentage
        while True:
            if th_checked:
                th_too_strict = count_inliers < ideal_count  # True if too less inliers remaining

                # calculate new theshold using old increment (but ensure th_new>0 by adjusting increment if needed)
                th_new = 0
                while th_new <= 0:
                    th_new = th + th_substract if th_too_strict else th - th_substract
                    if th_new <= 0:
                        th_substract /= 2

                # check if calculated new threshold has been used before
                th_already_checked = th_new in th_checked.keys()

                # if yes, decrease increment and recalculate new threshold
                th_substract = th_substract if not th_already_checked else th_substract / 2
                th = th_new if not th_already_checked else \
                    (th + th_substract if th_too_strict else th - th_substract)

            # RANSAC call
            # model_robust, inliers = ransac((src, dst), PolynomialTransform, min_samples=3,
            if src_coords.size and est_coords.size:
                # import here to avoid static TLS ImportError
                from skimage.measure import ransac
                from skimage.transform import AffineTransform

                model_robust, inliers = \
                    ransac((src_coords, est_coords), AffineTransform,
                           min_samples=6,
                           residual_threshold=th,
                           max_trials=2000,
                           stop_sample_num=int((min_inlier_percentage - self.rs_tolerance) / 100 * src_coords.shape[0]),
                           stop_residuals_sum=int(
                               (self.rs_max_outlier_percentage - self.rs_tolerance) / 100 * src_coords.shape[0])
                           )
            else:
                inliers = np.array([])
                break

            count_inliers = np.count_nonzero(inliers)

            th_checked[th] = count_inliers / src_coords.shape[0] * 100
            # print(th,'\t', th_checked[th], )
            if min_inlier_percentage - self.rs_tolerance < th_checked[th] < min_inlier_percentage + self.rs_tolerance:
                # print('in tolerance')
                break
            if count_iter > self.rs_max_iter or time.time() - time_start > self.rs_timeout:
                break  # keep last values and break while loop

            count_iter += 1

        outliers = inliers.__eq__(False) if inliers is not None and inliers.size else np.array([])

        if inGDF.empty or outliers is None or (isinstance(outliers, list) and not outliers) or \
           (isinstance(outliers, np.ndarray) and not outliers.size):
            outseries = Series([False] * len(self.GDF))
        elif len(inGDF) < len(self.GDF):
            inGDF['outliers'] = outliers
            fullGDF = GeoDataFrame(self.GDF['POINT_ID'])
            fullGDF = fullGDF.merge(inGDF[['POINT_ID', 'outliers']], on='POINT_ID', how="outer")
            # fullGDF.outliers.copy()[~fullGDF.POINT_ID.isin(GDF.POINT_ID)] = False
            fullGDF = fullGDF.fillna(False)  # NaNs are due to exclude_previous_outliers
            outseries = fullGDF['outliers']
        else:
            outseries = Series(outliers)

        assert len(outseries) == len(self.GDF), 'RANSAC output validation failed.'

        self.ransac_model_robust = model_robust

        return outseries