Ejemplo n.º 1
0
import pandas as pd
import numpy as np
import geopandas as gpd
import shared
import sys

args = sys.argv[1:]

households = pd.read_csv("data/households.csv", index_col="HHID")
gq_households = households[households.GQFlag == 1]
households = households[households.GQFlag == 0]

buildings = gpd.read_geocsv(args[0], index_col="building_id")
maz_controls = pd.read_csv("data/maz_controls.csv")

households["maz_id"] = \
    households.maz.map(maz_controls.set_index("MAZ").MAZ_ORIGINAL).values
buildings["residential_units"] = \
    buildings.residential_units.fillna(0).astype("int")

household_assignment = []
for maz_id in buildings.maz_id.dropna().unique():
    maz_households = households[households.maz_id == maz_id]
    building_options = buildings[buildings.maz_id == maz_id]
    building_id_options = np.repeat(building_options.index,
                                    building_options.residential_units)
    assert building_options.residential_units.sum() == building_id_options.size

    cnt = len(maz_households)
    if cnt == 0:
        continue
Ejemplo n.º 2
0
import sys
import time

args = sys.argv[1:]
prefix = args[0] + "_" if len(args) else ""

building_id_start_val = 1

# this file reads the split_parcels.csv and the
# buildings_linked_to_parcels.csv and splits up the attribuets
# from parcels to buildings.  Since it also removes attributes
# from the parcel tables it writes out both moved_attribute_parcels.csv
# and moved_attribute_buildings.csv

parcels = gpd.read_geocsv("cache/%ssplit_parcels_unioned.csv" % prefix,
                          index_col="apn",
                          low_memory=False)
buildings_linked_to_parcels = gpd.read_geocsv(
    "cache/%sbuildings_linked_to_parcels.csv" % prefix,
    low_memory=False,
    index_col="building_id")

# this file contains mapping of blocks to mazs to tazs, but we want
# the maz to taz mapping
maz_to_taz = pd.read_csv("data/GeogXWalk2010_Blocks_MAZ_TAZ.csv").\
    drop_duplicates(subset=["MAZ_ORIGINAL"]).\
    set_index("MAZ_ORIGINAL").TAZ_ORIGINAL

parcels["taz_id"] = parcels.maz_id.map(maz_to_taz)

buildings_linked_to_parcels['building:levels'] = pd.to_numeric(
Ejemplo n.º 3
0
import yaml

cities_and_counties = yaml.load(open("cities_and_counties.yaml").read())

args = sys.argv[1:]
county = args[0]
lower_county = county.replace(" ", "_").lower()

cities_in_this_county = [
    c.replace("_", " ").title() for c in cities_and_counties[lower_county]
]

# would be nice to fetch these over the web, but can't seem to
# get a url for a lfs file
url = "%s_parcels.zip" % lower_county
parcels = gpd.read_geocsv(url, low_memory=False)
juris = gpd.GeoDataFrame.from_file("data/juris.geojson")

# filter to jurisdictions in this county so as not to mis-assign
# egregiously - of course we might still mis-assign within the county
juris = juris[juris.NAME10.isin(cities_in_this_county)]

print "There are %d parcels" % len(parcels)

parcels["juris"] = np.nan
BATCHSIZE = 50000

parcels["polygon_geometry"] = parcels.geometry
parcels["geometry"] = parcels.centroid

for i in range(0, len(parcels), BATCHSIZE):
Ejemplo n.º 4
0
    buildings.loc[buildings.name == 'MAZ-level dummy building',
                  'maz_building_id'].str.replace('BLDG', 'PCL')

buildings.reset_index(drop=True, inplace=True)
buildings.index += 1

buildings.drop('geometry', axis=1).to_csv(
     "cache/merged_buildings.csv", index_label="building_id")
buildings[['geometry']].to_csv(
     "cache/buildings_geometry.csv", index_label="building_id")
print "Finished writing buildings"

parcels = glob.glob("cache/*moved_attribute_parcels.csv")
juris_names = [p.replace("_moved_attribute_parcels.csv", "").
               replace("cache/", "") for p in parcels]
parcels = [gpd.read_geocsv(p) for p in parcels]
for i in range(len(parcels)):
    parcels[i]["juris_name"] = juris_names[i]
parcels = gpd.GeoDataFrame(pd.concat(parcels))

# FIXME this appends the whole juris name to the apn to make it unique
# instead this should be 4 character abbreviations
parcels["apn"] = parcels.juris_name.str.cat(
    parcels.apn.astype("str"), sep="-")

maz_pcls = xwalk.groupby('MAZ_ORIGINAL').TAZ_ORIGINAL.first()
mazpcl_dummies = buildings.loc[buildings.name == 'MAZ-level dummy building',
                               ['apn', 'maz_id']]
mazpcl_dummies['taz_id'] = mazpcl_dummies.maz_id.map(maz_pcls)
for col in parcels.columns[~parcels.columns.isin(mazpcl_dummies.columns)]:
    mazpcl_dummies[col] = np.nan
Ejemplo n.º 5
0
import geopandas as gpd
import pandas as pd
import shared
import osmnx

# this script is used to assign an ESRI jobs dataset which comes with lat-lng
# locations.  We're not using this right now - instead we're using assign_jobs.py
# which uses maz-level control totals to place jobs.  This is subject to change
# in the future, and is mostly to keep private data out of the UrbanSim micro
# data in order to be able to release all of our data as a public download.

print "Reading data"
buildings = gpd.read_geocsv("cache/buildings_match_controls.csv",
                            index_col="building_id")
parcels = gpd.read_geocsv("cache/moved_attribute_parcels.csv", index_col="apn")
establishments = gpd.read_geocsv("cache/establishments.csv",
                                 index_col="duns_number")
mazs = gpd.read_geocsv("mazs.csv", index_col="maz_id")

berkeley = osmnx.gdf_from_place("Berkeley, California")
berkeley_mazs = gpd.sjoin(mazs, berkeley).drop("index_right", axis=1)

print "Intersecting with buildings"
# goal here is to create a dictionary where keys are establishments ids and
# values are possible building_ids - this lets us write a function to assign
# jobs to buildings.  when we have a match to a parcel, we list the buildings
# on that parcel; when we have a match to a maz, we list the buildings in
# that maz.
establishments_intersect_buildings = gpd.sjoin(establishments, buildings)
establishments_possible_buildings = {
    k: [v]
Ejemplo n.º 6
0
import geopandas as gpd
import pandas as pd
import numpy as np
import shared
import sys
from shapely.geometry import Polygon

args = sys.argv[1:]
prefix = args[0] + "_" if len(args) else ""

parcels = gpd.read_geocsv("parcels/%sparcels.csv" % prefix, low_memory=False)
mazs = gpd.read_geocsv("data/mazs.csv")

parcels_centroid = parcels.copy()
parcels_centroid["geometry"] = parcels.centroid
parcels_linked_to_mazs = gpd.sjoin(parcels_centroid, mazs)

parcels["maz_id"] = parcels_linked_to_mazs["maz_id"]


# takes a list of parcels and returns a dictionary where keys are parcel ids
# and values are lists of parcel ids which are fully contained in the key
# parcel id
def find_fully_contained_parcels(parcels):
    # next operation fails for invalid parcels, of which there are a few
    parcels = parcels[parcels.is_valid].copy()

    # this is because condos are often "cut out" of their parent parcel - we
    # want to drop the "cut out" part when doing the contains below
    # convex hull might not be precisely what we want here, but it
    # is close and I can't think of any major side effects
Ejemplo n.º 7
0
import pandas as pd
import geopandas as gpd
import time
import sys
from shared import compute_area, compute_overlap_areas

# This script reads in the split_parcels.csv and the buildings.csv
# and joins buildings to parcels.  Each building is assigned an apn
# and is written to buildings_linked_to_parcels.csv

args = sys.argv[1:]
prefix = args[0] + "_" if len(args) else ""

print "Reading parcels and buildings", time.ctime()
buildings = gpd.read_geocsv("cache/%sbuildings.csv" % prefix,
                            low_memory=False,
                            index_col="building_id")
split_parcels = gpd.read_geocsv("cache/%ssplit_parcels.csv" % prefix,
                                index_col="apn",
                                low_memory=False)
mazs = gpd.read_geocsv("data/mazs.csv")[["maz_id", "geometry"]]


def assign_maz_id_by_centroid(df, mazs):
    df_centroid = df.copy()
    df_centroid["geometry"] = df.centroid

    df_linked_to_mazs = gpd.sjoin(df_centroid, mazs)

    df["maz_id"] = df_linked_to_mazs["maz_id"]
    return df
Ejemplo n.º 8
0
import pandas as pd
import geopandas as gpd
import numpy as np
import time
from shared import compute_pct_area, compute_area
import sys

# This script joins parcels mazs and splits them along maz boundaries
# it reads parcels.csv and mazs.csv and writes split_parcels.csv

args = sys.argv[1:]
prefix = args[0] + "_" if len(args) else ""

print "Loading parcels and mazs"
print time.ctime()
parcels = gpd.read_geocsv("cache/%sparcels_no_self_intersections.csv" % prefix,
                          low_memory=False)
bad_apns = ["999 999999999"]
parcels = parcels[~parcels.apn.isin(bad_apns)]
parcels = parcels[parcels.apn.notnull()]
parcels["orig_apn"] = parcels["apn"]
mazs = gpd.read_geocsv("data/mazs.csv")[["maz_id", "geometry"]]

# join mazs to parcels
print "Joining parcels to mazs"
print time.ctime()
joined_parcels = gpd.sjoin(parcels, mazs, how="inner", op='intersects')


# when we intersect parcels with mazs, we want to merge parcel slivers back to
# the main shape - we don't need to keep small slivers of parcels that could be
# geometric errors