# In[16]:

#Count the number of occurrences for each zip code in the data frame,
#then converting the data series to a data frame for merging.

counts = data['Zip Code'].value_counts()
counts = counts.to_frame(name='count')
counts = counts.reset_index()

# In[22]:

#Merge the number of occurences for each zip code, with the corresponding zip code polygon

counts = GeoDataFrame(
    counts.merge(zip_codes, how='left', left_on='index', right_on='zip_code'))

#Dropping all NaNs in the geometry column.

counts = counts.dropna()  #drop null values

#Plotting the data
fig, ax = plt.subplots(figsize=(8, 8))

counts.plot(column='count', cmap='Blues', alpha=1, linewidth=0.1, ax=ax)

plt.title('Building Permits by Zipcode', size=20)
plt.axis('off')
plt.show()

# In[ ]:
コード例 #2
0
ファイル: source.py プロジェクト: lukereed76/cartoframes
class Source:
    """Source

    Args:
        source (str, pandas.DataFrame, geopandas.GeoDataFrame): a table name,
            SQL query, DataFrame, GeoDataFrame instance.
        credentials (:py:class:`Credentials <cartoframes.auth.Credentials>`, optional):
            A Credentials instance. If not provided, the credentials will be automatically
            obtained from the default credentials if available.
        geom_col (str, optional): string indicating the geometry column name in the source `DataFrame`.
        encode_data (bool, optional): Indicates whether the data needs to be encoded.
            Default is True.

    Example:

        Table name.

        >>> Source('table_name')

        SQL query.

        >>> Source('SELECT * FROM table_name')

        DataFrame object.

        >>> Source(df, geom_col='my_geom')

        GeoDataFrame object.

        >>> Source(gdf)

        Setting the credentials.

        >>> Source('table_name', credentials)

    """
    def __init__(self, source, credentials=None, geom_col=None, encode_data=True):
        self.credentials = None
        self.datetime_column_names = None
        self.encode_data = encode_data

        if isinstance(source, str):
            # Table, SQL query
            self.type = SourceType.QUERY
            self.manager = ContextManager(credentials)
            self.query = self.manager.compute_query(source)
            self.credentials = self.manager.credentials
        elif isinstance(source, DataFrame):
            if isinstance(source, GeoDataFrame):
                if is_reprojection_needed(source):
                    source = reproject(source)

            # DataFrame, GeoDataFrame
            self.type = SourceType.GEOJSON
            self.gdf = GeoDataFrame(source, copy=True)
            self.set_datetime_columns()

            if geom_col in self.gdf:
                set_geometry(self.gdf, geom_col, inplace=True)
            elif has_geometry(source):
                self.gdf.set_geometry(source.geometry.name, inplace=True)
            else:
                raise ValueError('No valid geometry found. Please provide an input source with ' +
                                 'a valid geometry or specify the "geom_col" param with a geometry column.')

            # Remove nan geometries
            self.gdf.dropna(subset=[self.gdf.geometry.name], inplace=True)

            # Remove empty geometries
            self.gdf = self.gdf[~self.gdf.geometry.is_empty]

            # Checking the uniqueness of the geometry type
            geometry_types = set(self.gdf.geom_type.unique()).difference({None})
            if geometry_types not in VALID_GEOMETRY_TYPES:
                raise ValueError('No valid geometry column types ({}), it has '.format(geometry_types) +
                                 'to be one of the next type sets: {}.'.format(VALID_GEOMETRY_TYPES))

        else:
            raise ValueError('Wrong source input. Valid values are str and DataFrame.')

    def get_credentials(self):
        if self.type == SourceType.QUERY:
            if self.credentials:
                return {
                    # CARTO VL requires a username but CARTOframes allows passing only the base_url.
                    # That's why 'user' is used by default if username is empty.
                    'username': self.credentials.username or 'user',
                    'api_key': self.credentials.api_key,
                    'base_url': self.credentials.base_url
                }
        elif self.type == SourceType.GEOJSON:
            return None

    def set_datetime_columns(self):
        if self.type == SourceType.GEOJSON:
            self.datetime_column_names = get_datetime_column_names(self.gdf)

            if self.datetime_column_names:
                for column in self.datetime_column_names:
                    self.gdf[column] = self.gdf[column].dt.strftime(RFC_2822_DATETIME_FORMAT)

    def get_datetime_column_names(self):
        return self.datetime_column_names

    def get_geom_type(self):
        if self.type == SourceType.QUERY:
            return self.manager.get_geom_type(self.query) or 'point'
        elif self.type == SourceType.GEOJSON:
            return get_geodataframe_geom_type(self.gdf)

    def compute_metadata(self, columns=None):
        if self.type == SourceType.QUERY:
            self.data = self.query
            self.bounds = self.manager.get_bounds(self.query)
        elif self.type == SourceType.GEOJSON:
            if columns is not None:
                columns += [self.gdf.geometry.name]
                self.gdf = self.gdf[columns]
            self.data = get_geodataframe_data(self.gdf, self.encode_data)
            self.bounds = get_geodataframe_bounds(self.gdf)

    def is_local(self):
        return self.type == SourceType.GEOJSON

    def is_public(self):
        if self.type == SourceType.QUERY:
            return self.manager.is_public(self.query)
        elif self.type == SourceType.GEOJSON:
            return True

    def schema(self):
        if self.type == SourceType.QUERY:
            return self.manager.get_schema()
        elif self.type == SourceType.GEOJSON:
            return None

    def get_table_names(self):
        if self.type == SourceType.QUERY:
            return self.manager.get_table_names(self.query)
        elif self.type == SourceType.GEOJSON:
            return []
コード例 #3
0
ファイル: od.py プロジェクト: pctBayArea/stplanpy
def orig_dest(fd: gpd.GeoDataFrame,
              taz: gpd.GeoDataFrame,
              taz_name="tazce",
              plc_name="placefp",
              cnt_name="countyfp") -> gpd.GeoDataFrame:
    r"""
    Add County and Place codes to origin-destination data           

    This function adds County and Census Designated Place codes from the
    GeoDataFrame `taz` to the origin-destination or flow GeoDataFrame `fd`. The
    relevant column names are defined in `taz_name`, `plc_name`, and `cnt_name`,
    respectively. The column names in the output GeoDataFrame are "orig_taz",
    "dest_taz", "orig_plc", "dest_plc", "orig_cnt", and "dest_cnt". 
    
    Parameters
    ----------
    taz : geopandas.GeoDataFrame
        GeoDataFrame containing Traffic Analysis (TAZ) codes, Census Designated
        Place codes, and County codes.
    taz_name : str, defaults to "tazce"
        Column name in `taz` GeoDataFrame that contains TAZ codes. Defaults to
        "tazce".
    plc_name : str, defaults to "placefp"
        Column name in `taz` GeoDataFrame that contains Census Designated Place
        codes. Defaults to "placefp".
    cnt_name : str, defaults to "countyfp"
        Column name in `taz` GeoDataFrame that contains County codes. Defaults
        to "countyfp".
    
    Returns
    -------
    geopandas.GeoDataFrame
        GeoDataFrame with origin and destination TAZ, County, and Place codes.
        The column names are "orig_taz", "dest_taz", "orig_plc", "dest_plc",
        "orig_cnt", and "dest_cnt". 
    
    See Also
    --------
    ~stplanpy.acs.read_acs
    ~stplanpy.geo.in_place
    
    Examples
    --------
    The example data files: "`od_data.csv`_", "`tl_2011_06_taz10.zip`_", and
    "`tl_2020_06_place.zip`_", can be downloaded from github.

    .. code-block:: python

        from stplanpy import acs
        from stplanpy import geo
        from stplanpy import od

        # Read origin-destination flow data
        flow_data = acs.read_acs("od_data.csv")
        flow_data = flow_data.clean_acs()

        # San Francisco Bay Area counties
        counties = ["001", "013", "041", "055", "075", "081", "085", "095", "097"]

        # Place code East Palo Alto
        places = ["20956"]

        # Read place data
        place = geo.read_shp("tl_2020_06_place.zip")

        # Keep only East Palo Alto
        place = place[place["placefp"].isin(places)]

        # Read taz data
        taz = geo.read_shp("tl_2011_06_taz10.zip")

        # Rename columns for consistency
        taz.rename(columns = {"countyfp10":"countyfp", "tazce10":"tazce"}, inplace = True)

        # Filter on county codes
        taz = taz[taz["countyfp"].isin(counties)]

        # Compute which taz lay inside a place and which part
        taz = taz.in_place(place)

        # Add county and place codes to data frame.
        flow_data = flow_data.orig_dest(taz)

    .. _od_data.csv: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/od_data.csv
    .. _tl_2011_06_taz10.zip: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/tl_2011_06_taz10.zip
    .. _tl_2020_06_place.zip: https://raw.githubusercontent.com/pctBayArea/stplanpy/main/examples/tl_2020_06_place.zip
    """
    # Drop lines that have no valid countyfp or placefp. i.e. are not within a
    # county or place
    cnt = taz.dropna(subset=[cnt_name])
    cnt = cnt.drop(columns="geometry")
    plc = taz.dropna(subset=[plc_name])
    plc = plc.drop(columns="geometry")
    # We do not know the distribution of origins or destinations within a TAZ.
    # Therefore, add TAZ to place if more than 0.5 of its surface area is within
    # this place.
    plc = plc.loc[plc["area"] > 0.5]

    # Merge on countyfp codes
    fd = fd.merge(cnt, how="left", left_on="orig_taz", right_on=taz_name)
    fd.rename(columns={cnt_name: "orig_cnt"}, inplace=True)
    fd = fd.drop(columns=[taz_name, plc_name, "area"])
    fd = fd.merge(cnt, how="left", left_on="dest_taz", right_on=taz_name)
    fd.rename(columns={cnt_name: "dest_cnt"}, inplace=True)
    fd = fd.drop(columns=[taz_name, plc_name, "area"])

    # Merge on placefp codes
    fd = fd.merge(plc, how="left", left_on="orig_taz", right_on=taz_name)
    fd.rename(columns={plc_name: "orig_plc"}, inplace=True)
    fd = fd.drop(columns=[taz_name, cnt_name, "area"])
    fd = fd.merge(plc, how="left", left_on="dest_taz", right_on=taz_name)
    fd.rename(columns={plc_name: "dest_plc"}, inplace=True)
    fd = fd.drop(columns=[taz_name, cnt_name, "area"])

    # Clean up data frame
    fd.fillna({"orig_plc": "", "dest_plc": ""}, inplace=True)

    return fd
コード例 #4
0
concat_df.zip_code.value_counts()
concat_df['water_debt_only'].value_counts()

# Mapping NYC Zip Codes
zip_codes = GeoDataFrame.from_file(
    'C:/Users/ghodg/Desktop/Project Data/Tax Lien/Zip Code Data/ZIP_CODE_040114/'
    'ZIP_CODE_040114.shp')
zip_codes['zip_code'] = zip_codes['ZIPCODE'].astype(int)
concat_df['zip_code'] = concat_df['zip_code'].astype(int)

counts = concat_df['zip_code'].value_counts()
counts = counts.to_frame(name='count_buildings')
counts = counts.reset_index()
counts = GeoDataFrame(
    counts.merge(zip_codes, how='left', left_on='index', right_on='zip_code'))
counts = counts.dropna()

# Plotting the map and colorbar
norm = colors.Normalize(vmin=counts.count_buildings.min(),
                        vmax=counts.count_buildings.max())
cbar = plt.cm.ScalarMappable(norm=norm, cmap='Blues')

fig, ax = plt.subplots(figsize=(10, 10))
counts.plot(column='count_buildings',
            cmap='Blues',
            legend=False,
            alpha=1,
            linewidth=0.5,
            edgecolor='black',
            ax=ax)
ax_cbar = fig.colorbar(cbar, ax=ax, fraction=0.046, pad=0.04)