import pandas as pd import numpy as np import geopandas as gpd import shared import sys args = sys.argv[1:] households = pd.read_csv("data/households.csv", index_col="HHID") gq_households = households[households.GQFlag == 1] households = households[households.GQFlag == 0] buildings = gpd.read_geocsv(args[0], index_col="building_id") maz_controls = pd.read_csv("data/maz_controls.csv") households["maz_id"] = \ households.maz.map(maz_controls.set_index("MAZ").MAZ_ORIGINAL).values buildings["residential_units"] = \ buildings.residential_units.fillna(0).astype("int") household_assignment = [] for maz_id in buildings.maz_id.dropna().unique(): maz_households = households[households.maz_id == maz_id] building_options = buildings[buildings.maz_id == maz_id] building_id_options = np.repeat(building_options.index, building_options.residential_units) assert building_options.residential_units.sum() == building_id_options.size cnt = len(maz_households) if cnt == 0: continue
import sys import time args = sys.argv[1:] prefix = args[0] + "_" if len(args) else "" building_id_start_val = 1 # this file reads the split_parcels.csv and the # buildings_linked_to_parcels.csv and splits up the attribuets # from parcels to buildings. Since it also removes attributes # from the parcel tables it writes out both moved_attribute_parcels.csv # and moved_attribute_buildings.csv parcels = gpd.read_geocsv("cache/%ssplit_parcels_unioned.csv" % prefix, index_col="apn", low_memory=False) buildings_linked_to_parcels = gpd.read_geocsv( "cache/%sbuildings_linked_to_parcels.csv" % prefix, low_memory=False, index_col="building_id") # this file contains mapping of blocks to mazs to tazs, but we want # the maz to taz mapping maz_to_taz = pd.read_csv("data/GeogXWalk2010_Blocks_MAZ_TAZ.csv").\ drop_duplicates(subset=["MAZ_ORIGINAL"]).\ set_index("MAZ_ORIGINAL").TAZ_ORIGINAL parcels["taz_id"] = parcels.maz_id.map(maz_to_taz) buildings_linked_to_parcels['building:levels'] = pd.to_numeric(
import yaml cities_and_counties = yaml.load(open("cities_and_counties.yaml").read()) args = sys.argv[1:] county = args[0] lower_county = county.replace(" ", "_").lower() cities_in_this_county = [ c.replace("_", " ").title() for c in cities_and_counties[lower_county] ] # would be nice to fetch these over the web, but can't seem to # get a url for a lfs file url = "%s_parcels.zip" % lower_county parcels = gpd.read_geocsv(url, low_memory=False) juris = gpd.GeoDataFrame.from_file("data/juris.geojson") # filter to jurisdictions in this county so as not to mis-assign # egregiously - of course we might still mis-assign within the county juris = juris[juris.NAME10.isin(cities_in_this_county)] print "There are %d parcels" % len(parcels) parcels["juris"] = np.nan BATCHSIZE = 50000 parcels["polygon_geometry"] = parcels.geometry parcels["geometry"] = parcels.centroid for i in range(0, len(parcels), BATCHSIZE):
buildings.loc[buildings.name == 'MAZ-level dummy building', 'maz_building_id'].str.replace('BLDG', 'PCL') buildings.reset_index(drop=True, inplace=True) buildings.index += 1 buildings.drop('geometry', axis=1).to_csv( "cache/merged_buildings.csv", index_label="building_id") buildings[['geometry']].to_csv( "cache/buildings_geometry.csv", index_label="building_id") print "Finished writing buildings" parcels = glob.glob("cache/*moved_attribute_parcels.csv") juris_names = [p.replace("_moved_attribute_parcels.csv", ""). replace("cache/", "") for p in parcels] parcels = [gpd.read_geocsv(p) for p in parcels] for i in range(len(parcels)): parcels[i]["juris_name"] = juris_names[i] parcels = gpd.GeoDataFrame(pd.concat(parcels)) # FIXME this appends the whole juris name to the apn to make it unique # instead this should be 4 character abbreviations parcels["apn"] = parcels.juris_name.str.cat( parcels.apn.astype("str"), sep="-") maz_pcls = xwalk.groupby('MAZ_ORIGINAL').TAZ_ORIGINAL.first() mazpcl_dummies = buildings.loc[buildings.name == 'MAZ-level dummy building', ['apn', 'maz_id']] mazpcl_dummies['taz_id'] = mazpcl_dummies.maz_id.map(maz_pcls) for col in parcels.columns[~parcels.columns.isin(mazpcl_dummies.columns)]: mazpcl_dummies[col] = np.nan
import geopandas as gpd import pandas as pd import shared import osmnx # this script is used to assign an ESRI jobs dataset which comes with lat-lng # locations. We're not using this right now - instead we're using assign_jobs.py # which uses maz-level control totals to place jobs. This is subject to change # in the future, and is mostly to keep private data out of the UrbanSim micro # data in order to be able to release all of our data as a public download. print "Reading data" buildings = gpd.read_geocsv("cache/buildings_match_controls.csv", index_col="building_id") parcels = gpd.read_geocsv("cache/moved_attribute_parcels.csv", index_col="apn") establishments = gpd.read_geocsv("cache/establishments.csv", index_col="duns_number") mazs = gpd.read_geocsv("mazs.csv", index_col="maz_id") berkeley = osmnx.gdf_from_place("Berkeley, California") berkeley_mazs = gpd.sjoin(mazs, berkeley).drop("index_right", axis=1) print "Intersecting with buildings" # goal here is to create a dictionary where keys are establishments ids and # values are possible building_ids - this lets us write a function to assign # jobs to buildings. when we have a match to a parcel, we list the buildings # on that parcel; when we have a match to a maz, we list the buildings in # that maz. establishments_intersect_buildings = gpd.sjoin(establishments, buildings) establishments_possible_buildings = { k: [v]
import geopandas as gpd import pandas as pd import numpy as np import shared import sys from shapely.geometry import Polygon args = sys.argv[1:] prefix = args[0] + "_" if len(args) else "" parcels = gpd.read_geocsv("parcels/%sparcels.csv" % prefix, low_memory=False) mazs = gpd.read_geocsv("data/mazs.csv") parcels_centroid = parcels.copy() parcels_centroid["geometry"] = parcels.centroid parcels_linked_to_mazs = gpd.sjoin(parcels_centroid, mazs) parcels["maz_id"] = parcels_linked_to_mazs["maz_id"] # takes a list of parcels and returns a dictionary where keys are parcel ids # and values are lists of parcel ids which are fully contained in the key # parcel id def find_fully_contained_parcels(parcels): # next operation fails for invalid parcels, of which there are a few parcels = parcels[parcels.is_valid].copy() # this is because condos are often "cut out" of their parent parcel - we # want to drop the "cut out" part when doing the contains below # convex hull might not be precisely what we want here, but it # is close and I can't think of any major side effects
import pandas as pd import geopandas as gpd import time import sys from shared import compute_area, compute_overlap_areas # This script reads in the split_parcels.csv and the buildings.csv # and joins buildings to parcels. Each building is assigned an apn # and is written to buildings_linked_to_parcels.csv args = sys.argv[1:] prefix = args[0] + "_" if len(args) else "" print "Reading parcels and buildings", time.ctime() buildings = gpd.read_geocsv("cache/%sbuildings.csv" % prefix, low_memory=False, index_col="building_id") split_parcels = gpd.read_geocsv("cache/%ssplit_parcels.csv" % prefix, index_col="apn", low_memory=False) mazs = gpd.read_geocsv("data/mazs.csv")[["maz_id", "geometry"]] def assign_maz_id_by_centroid(df, mazs): df_centroid = df.copy() df_centroid["geometry"] = df.centroid df_linked_to_mazs = gpd.sjoin(df_centroid, mazs) df["maz_id"] = df_linked_to_mazs["maz_id"] return df
import pandas as pd import geopandas as gpd import numpy as np import time from shared import compute_pct_area, compute_area import sys # This script joins parcels mazs and splits them along maz boundaries # it reads parcels.csv and mazs.csv and writes split_parcels.csv args = sys.argv[1:] prefix = args[0] + "_" if len(args) else "" print "Loading parcels and mazs" print time.ctime() parcels = gpd.read_geocsv("cache/%sparcels_no_self_intersections.csv" % prefix, low_memory=False) bad_apns = ["999 999999999"] parcels = parcels[~parcels.apn.isin(bad_apns)] parcels = parcels[parcels.apn.notnull()] parcels["orig_apn"] = parcels["apn"] mazs = gpd.read_geocsv("data/mazs.csv")[["maz_id", "geometry"]] # join mazs to parcels print "Joining parcels to mazs" print time.ctime() joined_parcels = gpd.sjoin(parcels, mazs, how="inner", op='intersects') # when we intersect parcels with mazs, we want to merge parcel slivers back to # the main shape - we don't need to keep small slivers of parcels that could be # geometric errors