Python GeoDataFrame.fillna Examples

Programming Language: Python

Namespace/Package Name: geopandas

Class/Type: GeoDataFrame

Method/Function: fillna

Examples at hotexamples.com: 5

Python GeoDataFrame.fillna - 5 examples found. These are the top rated real world Python examples of geopandas.GeoDataFrame.fillna extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GeoDataFrame(30)

copy(30)

iterrows(30)

groupby(30)

from_file(30)

from_features(30)

drop(30)

crs(30)

from_postgis(21)

apply(21)

append(16)

geometry(15)

explode(15)

columns(14)

itertuples(13)

head(11)

buffer(10)

index(10)

dissolve(9)

drop_duplicates(8)

equals(7)

intersects(5)

fillna(5)

join(4)

dropna(4)

info(4)

keys(3)

_generate_sindex(3)

_repr_html_(3)

assign(3)

filter(3)

from_dict(3)

distance(3)

iterfeatures(2)

intersection(2)

astype(2)

from_records(2)

from_csv(1)

find_next_level_larger(1)

_geometry_column_name(1)

_networks(1)

STUSPS(1)

isna(1)

from_url(1)

duplicated(1)

classify_objects(1)

contains(1)

Example #1

Show file

File: land_utils.py Project: CooperNederhood/namib_landuse

def building_density_per_block(bldgs: gpd.GeoDataFrame,
                               blocks: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    '''
    Adds a column to blocks dataframe which contains the total
    building area and the building density
    '''

    assert 'block_id' in bldgs.columns, "ERROR: bldgs dataframe does not have block_id"

    bldgs['bldg_area'] = bldgs.area
    bldgs['bldg_count'] = 1
    bldg_area_by_block = bldgs[['block_id', 'bldg_area',
                                'bldg_count']].groupby('block_id').sum()
    bldg_area_by_block.reset_index(inplace=True)

    for c in ['bldg_area', 'bldg_count']:
        if c in blocks.columns:
            blocks.drop(columns=[c], inplace=True)
    blocks = blocks.merge(bldg_area_by_block, how='left', on='block_id')
    blocks['block_area'] = blocks.area
    blocks['bldg_density'] = blocks['bldg_area'] / blocks['block_area']

    blocks.fillna(value=0.0, inplace=True)

    return blocks

Example #2

Show file

File: core.py Project: amauryval/OsmGT

    def _check_topology_field(self,
                              input_gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
        if self._TOPO_FIELD not in input_gdf.columns.tolist():
            input_gdf[self._TOPO_FIELD] = input_gdf.index.apply(
                lambda x: int(x))

        input_gdf = input_gdf.fillna(self._DEFAULT_NAN_VALUE_TO_USE)
        return input_gdf

Example #3

Show file

def sanitise_geodataframe(gdf):
    if isinstance(gdf, GeoSeries):
        gdf = GeoDataFrame(gdf)
    gdf = gdf.fillna('None')
    object_columns = gdf.select_dtypes(['object']).columns
    for col in object_columns:
        if gdf[col].apply(lambda x: isinstance(x, (set, list))).any():
            gdf[col] = gdf[col].apply(lambda x: ','.join(x))
        elif gdf[col].apply(lambda x: isinstance(x, dict)).any():
            gdf[col] = gdf[col].apply(lambda x: str(x))
    return gdf

Example #4

Show file

File: od.py Project: pctBayArea/stplanpy

def orig_dest(fd: gpd.GeoDataFrame,
              taz: gpd.GeoDataFrame,
              taz_name="tazce",
              plc_name="placefp",
              cnt_name="countyfp") -> gpd.GeoDataFrame:
    r"""
    Add County and Place codes to origin-destination data           

    This function adds County and Census Designated Place codes from the
    GeoDataFrame `taz` to the origin-destination or flow GeoDataFrame `fd`. The
    relevant column names are defined in `taz_name`, `plc_name`, and `cnt_name`,
    respectively. The column names in the output GeoDataFrame are "orig_taz",
    "dest_taz", "orig_plc", "dest_plc", "orig_cnt", and "dest_cnt". 
    
    Parameters
    ----------
    taz : geopandas.GeoDataFrame
        GeoDataFrame containing Traffic Analysis (TAZ) codes, Census Designated
        Place codes, and County codes.
    taz_name : str, defaults to "tazce"
        Column name in `taz` GeoDataFrame that contains TAZ codes. Defaults to
        "tazce".
    plc_name : str, defaults to "placefp"
        Column name in `taz` GeoDataFrame that contains Census Designated Place
        codes. Defaults to "placefp".
    cnt_name : str, defaults to "countyfp"
        Column name in `taz` GeoDataFrame that contains County codes. Defaults
        to "countyfp".
    
    Returns
    -------
    geopandas.GeoDataFrame
        GeoDataFrame with origin and destination TAZ, County, and Place codes.
        The column names are "orig_taz", "dest_taz", "orig_plc", "dest_plc",
        "orig_cnt", and "dest_cnt". 
    
    See Also
    --------
    ~stplanpy.acs.read_acs
    ~stplanpy.geo.in_place
    
    Examples
    --------
    The example data files: "`od_data.csv`_", "`tl_2011_06_taz10.zip`_", and
    "`tl_2020_06_place.zip`_", can be downloaded from github.

    .. code-block:: python

        from stplanpy import acs
        from stplanpy import geo
        from stplanpy import od

        # Read origin-destination flow data
        flow_data = acs.read_acs("od_data.csv")
        flow_data = flow_data.clean_acs()

        # San Francisco Bay Area counties
        counties = ["001", "013", "041", "055", "075", "081", "085", "095", "097"]

        # Place code East Palo Alto
        places = ["20956"]

        # Read place data
        place = geo.read_shp("tl_2020_06_place.zip")

        # Keep only East Palo Alto
        place = place[place["placefp"].isin(places)]

        # Read taz data
        taz = geo.read_shp("tl_2011_06_taz10.zip")

        # Rename columns for consistency
        taz.rename(columns = {"countyfp10":"countyfp", "tazce10":"tazce"}, inplace = True)

        # Filter on county codes
        taz = taz[taz["countyfp"].isin(counties)]

        # Compute which taz lay inside a place and which part
        taz = taz.in_place(place)

        # Add county and place codes to data frame.
        flow_data = flow_data.orig_dest(taz)

    .. _od_data.csv: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/od_data.csv
    .. _tl_2011_06_taz10.zip: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/tl_2011_06_taz10.zip
    .. _tl_2020_06_place.zip: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/tl_2020_06_place.zip
    """
    # Drop lines that have no valid countyfp or placefp. i.e. are not within a
    # county or place
    cnt = taz.dropna(subset=[cnt_name])
    cnt = cnt.drop(columns="geometry")
    plc = taz.dropna(subset=[plc_name])
    plc = plc.drop(columns="geometry")
    # We do not know the distribution of origins or destinations within a TAZ.
    # Therefore, add TAZ to place if more than 0.5 of its surface area is within
    # this place.
    plc = plc.loc[plc["area"] > 0.5]

    # Merge on countyfp codes
    fd = fd.merge(cnt, how="left", left_on="orig_taz", right_on=taz_name)
    fd.rename(columns={cnt_name: "orig_cnt"}, inplace=True)
    fd = fd.drop(columns=[taz_name, plc_name, "area"])
    fd = fd.merge(cnt, how="left", left_on="dest_taz", right_on=taz_name)
    fd.rename(columns={cnt_name: "dest_cnt"}, inplace=True)
    fd = fd.drop(columns=[taz_name, plc_name, "area"])

    # Merge on placefp codes
    fd = fd.merge(plc, how="left", left_on="orig_taz", right_on=taz_name)
    fd.rename(columns={plc_name: "orig_plc"}, inplace=True)
    fd = fd.drop(columns=[taz_name, cnt_name, "area"])
    fd = fd.merge(plc, how="left", left_on="dest_taz", right_on=taz_name)
    fd.rename(columns={plc_name: "dest_plc"}, inplace=True)
    fd = fd.drop(columns=[taz_name, cnt_name, "area"])

    # Clean up data frame
    fd.fillna({"orig_plc": "", "dest_plc": ""}, inplace=True)

    return fd

Example #5

Show file

File: Tie_Point_Grid.py Project: PythonMATLABGIS/arosics

    def _RANSAC_outlier_detection(self, inGDF):
        """Detect geometric outliers between point cloud of source and estimated coordinates using RANSAC algorithm."""
        # from skimage.transform import PolynomialTransform  # import here to avoid static TLS ImportError

        src_coords = np.array(inGDF[['X_UTM', 'Y_UTM']])
        xyShift = np.array(inGDF[['X_SHIFT_M', 'Y_SHIFT_M']])
        est_coords = src_coords + xyShift

        for co, n in zip([src_coords, est_coords], ['src_coords', 'est_coords']):
            assert co.ndim == 2 and co.shape[1] == 2, "'%s' must have shape [Nx2]. Got shape %s." % (n, co.shape)

        if not 0 < self.rs_max_outlier_percentage < 100:
            raise ValueError
        min_inlier_percentage = 100 - self.rs_max_outlier_percentage

        # class PolyTF_1(PolynomialTransform):  # pragma: no cover
        #     def estimate(*data):
        #         return PolynomialTransform.estimate(*data, order=1)

        # robustly estimate affine transform model with RANSAC
        # eliminates not more than the given maximum outlier percentage of the tie points

        model_robust, inliers = None, None
        count_inliers = None
        th = 5  # start RANSAC threshold
        th_checked = {}  # dict of thresholds that already have been tried + calculated inlier percentage
        th_substract = 2
        count_iter = 0
        time_start = time.time()
        ideal_count = min_inlier_percentage * src_coords.shape[0] / 100

        # optimize RANSAC threshold so that it marks not much more or less than the given outlier percentage
        while True:
            if th_checked:
                th_too_strict = count_inliers < ideal_count  # True if too less inliers remaining

                # calculate new theshold using old increment (but ensure th_new>0 by adjusting increment if needed)
                th_new = 0
                while th_new <= 0:
                    th_new = th + th_substract if th_too_strict else th - th_substract
                    if th_new <= 0:
                        th_substract /= 2

                # check if calculated new threshold has been used before
                th_already_checked = th_new in th_checked.keys()

                # if yes, decrease increment and recalculate new threshold
                th_substract = th_substract if not th_already_checked else th_substract / 2
                th = th_new if not th_already_checked else \
                    (th + th_substract if th_too_strict else th - th_substract)

            # RANSAC call
            # model_robust, inliers = ransac((src, dst), PolynomialTransform, min_samples=3,
            if src_coords.size and est_coords.size:
                # import here to avoid static TLS ImportError
                from skimage.measure import ransac
                from skimage.transform import AffineTransform

                model_robust, inliers = \
                    ransac((src_coords, est_coords), AffineTransform,
                           min_samples=6,
                           residual_threshold=th,
                           max_trials=2000,
                           stop_sample_num=int((min_inlier_percentage - self.rs_tolerance) / 100 * src_coords.shape[0]),
                           stop_residuals_sum=int(
                               (self.rs_max_outlier_percentage - self.rs_tolerance) / 100 * src_coords.shape[0])
                           )
            else:
                inliers = np.array([])
                break

            count_inliers = np.count_nonzero(inliers)

            th_checked[th] = count_inliers / src_coords.shape[0] * 100
            # print(th,'\t', th_checked[th], )
            if min_inlier_percentage - self.rs_tolerance < th_checked[th] < min_inlier_percentage + self.rs_tolerance:
                # print('in tolerance')
                break
            if count_iter > self.rs_max_iter or time.time() - time_start > self.rs_timeout:
                break  # keep last values and break while loop

            count_iter += 1

        outliers = inliers.__eq__(False) if inliers is not None and inliers.size else np.array([])

        if inGDF.empty or outliers is None or (isinstance(outliers, list) and not outliers) or \
           (isinstance(outliers, np.ndarray) and not outliers.size):
            outseries = Series([False] * len(self.GDF))
        elif len(inGDF) < len(self.GDF):
            inGDF['outliers'] = outliers
            fullGDF = GeoDataFrame(self.GDF['POINT_ID'])
            fullGDF = fullGDF.merge(inGDF[['POINT_ID', 'outliers']], on='POINT_ID', how="outer")
            # fullGDF.outliers.copy()[~fullGDF.POINT_ID.isin(GDF.POINT_ID)] = False
            fullGDF = fullGDF.fillna(False)  # NaNs are due to exclude_previous_outliers
            outseries = fullGDF['outliers']
        else:
            outseries = Series(outliers)

        assert len(outseries) == len(self.GDF), 'RANSAC output validation failed.'

        self.ransac_model_robust = model_robust

        return outseries