def parcels_geography(parcels): df = pd.read_csv( os.path.join(misc.data_dir(), "02_01_2016_parcels_geography.csv"), index_col="geom_id") df = geom_id_to_parcel_id(df, parcels) # this will be used to map juris id to name juris_name = pd.read_csv( os.path.join(misc.data_dir(), "census_id_to_name.csv"), index_col="census_id").name10 df["juris_name"] = df.jurisdiction_id.map(juris_name) df.loc[2054504, "juris_name"] = "Marin County" df.loc[2054505, "juris_name"] = "Santa Clara County" df.loc[2054506, "juris_name"] = "Marin County" df.loc[572927, "juris_name"] = "Contra Costa County" # assert no empty juris values assert True not in df.juris_name.isnull().value_counts() df["pda_id"] = df.pda_id.str.lower() # danville wasn't supposed to be a pda df["pda_id"] = df.pda_id.replace("dan1", np.nan) return df
def get_dev_projects_table(scenario, parcels): df = pd.read_csv(os.path.join(DATA_DIR, "development_projects.csv")) df = reprocess_dev_projects(df) # this filters project by scenario if scenario in df: # df[scenario] is 1s and 0s indicating whether to include it df = df[df[scenario].astype('bool')] df = df.dropna(subset=['geom_id']) df.geom_id = df.geom_id.astype(float) cnts = df.geom_id.isin(parcels.geom_id).value_counts() if False in cnts.index: print "%d MISSING GEOMIDS!" % cnts.loc[False] df = df[df.geom_id.isin(parcels.geom_id)] geom_id = df.geom_id # save for later df = df.set_index("geom_id") df = geom_id_to_parcel_id(df, parcels).reset_index() # use parcel id df["geom_id"] = geom_id.values # add it back again cause it goes away return df
def parcels_geography(parcels): df = pd.read_csv(os.path.join(misc.data_dir(), "02_01_2016_parcels_geography.csv"), index_col="geom_id") df = geom_id_to_parcel_id(df, parcels) # this will be used to map juris id to name juris_name = pd.read_csv(os.path.join(misc.data_dir(), "census_id_to_name.csv"), index_col="census_id").name10 df["juris_name"] = df.jurisdiction_id.map(juris_name) df.loc[2054504, "juris_name"] = "Marin County" df.loc[2054505, "juris_name"] = "Santa Clara County" df.loc[2054506, "juris_name"] = "Marin County" df.loc[572927, "juris_name"] = "Contra Costa County" # assert no empty juris values assert True not in df.juris_name.isnull().value_counts() df["pda_id"] = df.pda_id.str.lower() # danville wasn't supposed to be a pda df["pda_id"] = df.pda_id.replace("dan1", np.nan) return df
def parcels_geography(parcels): df = pd.read_csv(os.path.join(DATA_DIR, "01_01_2017_parcels_geography.csv"), index_col="geom_id") df = geom_id_to_parcel_id(df, parcels) # this will be used to map juris id to name juris_name = pd.read_csv(os.path.join(DATA_DIR, "census_id_to_name.csv"), index_col="census_id").name10 df["juris_name"] = df.jurisdiction_id.map(juris_name) df.loc[1, "juris_name"] = "Edmonton" # df.loc[2054505, "juris_name"] = "Santa Clara County" # df.loc[2054506, "juris_name"] = "Marin County" # df.loc[572927, "juris_name"] = "Contra Costa County" # Added to make proportional_elcm step of simulations work # df.loc[124131, "juris_name"] = "Berkeley" # assert no empty juris values assert True not in df.juris_name.isnull().value_counts() df["pda_id"] = df.pda_id.astype(str).str.lower() # # danville wasn't supposed to be a pda # df["pda_id"] = df.pda_id.replace("dan1", np.nan) return df
def zoning_baseline(parcels, zoning_lookup, settings): df = pd.read_csv(os.path.join(misc.data_dir(), "2015_12_21_zoning_parcels.csv"), index_col="geom_id") df = pd.merge(df, zoning_lookup.to_frame(), left_on="zoning_id", right_index=True) df = geom_id_to_parcel_id(df, parcels) return df
def zoning_baseline(parcels, zoning_lookup, settings): df = pd.read_csv(os.path.join(misc.data_dir(), "2015_12_21_zoning_parcels.csv"), index_col="geom_id") df = pd.merge(df, zoning_lookup.to_frame(), left_on="zoning_id", right_index=True) df = geom_id_to_parcel_id(df, parcels) return df
def zoning_baseline(parcels, zoning_lookup, settings): df = gp.GeoDataFrame.from_file( os.path.join(DATA_DIR, "2017_01_01_zoning_parcels.shp")).set_index("geom_id") df = pd.merge(df, zoning_lookup.to_frame(), left_on="zoning_id", right_index=True) df = geom_id_to_parcel_id(df, parcels) return df
def development_projects(parcels, settings, scenario): df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv")) df = reprocess_dev_projects(df) df = df[df.action.isin(["add", "build"])] # this filters project by scenario colname = "scen%s" % scenario # df[colname] is 1s and 0s indicating whether to include it # this used to be an optional filter but now I'm going to require it so # that we don't accidentally include all the development projects since # we've started using scenario-based dev projects pretty extensively df = df[df[colname].astype('bool')] df = df.dropna(subset=['geom_id']) for fld in [ 'residential_sqft', 'residential_price', 'non_residential_price' ]: df[fld] = 0 df["redfin_sale_year"] = 2012 # hedonic doesn't tolerate nans df["stories"] = df.stories.fillna(1) df["building_sqft"] = df.building_sqft.fillna(0) df["non_residential_sqft"] = df.non_residential_sqft.fillna(0) df["building_type"] = df.building_type.replace("HP", "OF") df["building_type"] = df.building_type.replace("GV", "OF") df["building_type"] = df.building_type.replace("SC", "OF") df["building_type_id"] = \ df.building_type.map(settings["building_type_map2"]) df = df.dropna(subset=["geom_id"]) # need a geom_id to link to parcel_id df = df.dropna(subset=["year_built"]) # need a year built to get built df["geom_id"] = df.geom_id.astype("int") df = df.query('residential_units != "rent"') df["residential_units"] = df.residential_units.fillna(0).astype("int") geom_id = df.geom_id df = df.set_index("geom_id") df = geom_id_to_parcel_id(df, parcels).reset_index() # use parcel id df["geom_id"] = geom_id.values # add it back again cause it goes away # we don't predict prices for schools and hotels right now df = df.query("building_type_id <= 4 or building_type_id >= 7") df["deed_restricted_units"] = 0 print "Describe of development projects" print df[orca.get_table('buildings').local_columns].describe() return df
def zoning_baseline(parcels, zoning_lookup, settings): df = pd.read_csv(os.path.join(misc.data_dir(), "2015_12_21_zoning_parcels.csv"), index_col="geom_id") df = pd.merge(df, zoning_lookup.to_frame(), left_on="zoning_id", right_index=True) df = geom_id_to_parcel_id(df, parcels) d = {k: "type%d" % v for k, v in settings["building_type_map2"].items()} df.columns = [d.get(x, x) for x in df.columns] return df
def zoning_baseline(parcels, zoning_lookup, settings): df = pd.read_csv(os.path.join(misc.data_dir(), "2015_12_21_zoning_parcels.csv"), index_col="geom_id") df = pd.merge(df, zoning_lookup.to_frame(), left_on="zoning_id", right_index=True) df = geom_id_to_parcel_id(df, parcels) d = {k: "type%d" % v for k, v in settings["building_type_map2"].items()} df.columns = [d.get(x, x) for x in df.columns] return df
def development_projects(parcels, settings, scenario): df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv")) df = reprocess_dev_projects(df) df = df[df.action.isin(["add", "build"])] # this filters project by scenario colname = "scen%s" % scenario # df[colname] is 1s and 0s indicating whether to include it # this used to be an optional filter but now I'm going to require it so # that we don't accidentally include all the development projects since # we've started using scenario-based dev projects pretty extensively df = df[df[colname].astype('bool')] df = df.dropna(subset=['geom_id']) for fld in ['residential_sqft', 'residential_price', 'non_residential_price']: df[fld] = 0 df["redfin_sale_year"] = 2012 # hedonic doesn't tolerate nans df["stories"] = df.stories.fillna(1) df["building_sqft"] = df.building_sqft.fillna(0) df["non_residential_sqft"] = df.non_residential_sqft.fillna(0) df["building_type"] = df.building_type.replace("HP", "OF") df["building_type"] = df.building_type.replace("GV", "OF") df["building_type"] = df.building_type.replace("SC", "OF") df["building_type_id"] = \ df.building_type.map(settings["building_type_map2"]) df = df.dropna(subset=["geom_id"]) # need a geom_id to link to parcel_id df = df.dropna(subset=["year_built"]) # need a year built to get built df["geom_id"] = df.geom_id.astype("int") df = df.query('residential_units != "rent"') df["residential_units"] = df.residential_units.fillna(0).astype("int") geom_id = df.geom_id df = df.set_index("geom_id") df = geom_id_to_parcel_id(df, parcels).reset_index() # use parcel id df["geom_id"] = geom_id.values # add it back again cause it goes away # we don't predict prices for schools and hotels right now df = df.query("building_type_id <= 4 or building_type_id >= 7") df["deed_restricted_units"] = 0 print "Describe of development projects" print df[orca.get_table('buildings').local_columns].describe() return df
def parcels_geography(parcels): df = pd.read_csv(os.path.join(misc.data_dir(), "02_01_2016_parcels_geography.csv"), index_col="geom_id", dtype={'jurisdiction': 'str'}) df = geom_id_to_parcel_id(df, parcels) juris_name = pd.read_csv(os.path.join(misc.data_dir(), "census_id_to_name.csv"), index_col="census_id").name10 df["juris_name"] = df.jurisdiction_id.map(juris_name) df["pda_id"] = df.pda_id.str.lower() return df
def parcels_geography(parcels): df = pd.read_csv(os.path.join(misc.data_dir(), "02_01_2016_parcels_geography.csv"), index_col="geom_id", dtype={'jurisdiction': 'str'}) df = geom_id_to_parcel_id(df, parcels) juris_name = pd.read_csv(os.path.join(misc.data_dir(), "census_id_to_name.csv"), index_col="census_id").name10 df["juris_name"] = df.jurisdiction_id.map(juris_name) df["pda_id"] = df.pda_id.str.lower() return df
def demolish_events(parcels, settings, scenario): df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv")) df = reprocess_dev_projects(df) # this filters project by scenario if scenario in df: # df[scenario] is 1s and 0s indicating whether to include it df = df[df[scenario].astype('bool')] # keep demolish and build records df = df[df.action.isin(["demolish", "build"])] df = df.dropna(subset=['geom_id']) df = df.set_index("geom_id") df = geom_id_to_parcel_id(df, parcels).reset_index() # use parcel id return df
def demolish_events(parcels, settings, scenario): df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv")) df = reprocess_dev_projects(df) # this filters project by scenario if scenario in df: # df[scenario] is 1s and 0s indicating whether to include it df = df[df[scenario].astype('bool')] # keep demolish and build records df = df[df.action.isin(["demolish", "build"])] df = df.dropna(subset=['geom_id']) df = df.set_index("geom_id") df = geom_id_to_parcel_id(df, parcels).reset_index() # use parcel id return df
def get_dev_projects_table(scenario, parcels): df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv")) df = reprocess_dev_projects(df) # this filters project by scenario if scenario in df: # df[scenario] is 1s and 0s indicating whether to include it df = df[df[scenario].astype('bool')] df = df.dropna(subset=['geom_id']) cnts = df.geom_id.isin(parcels.geom_id).value_counts() if False in cnts.index: print "%d MISSING GEOMIDS!" % cnts.loc[False] df = df[df.geom_id.isin(parcels.geom_id)] geom_id = df.geom_id # save for later df = df.set_index("geom_id") df = geom_id_to_parcel_id(df, parcels).reset_index() # use parcel id df["geom_id"] = geom_id.values # add it back again cause it goes away return df
def static_parcels(settings, parcels): # list of geom_ids to not relocate static_parcels = settings["static_parcels"] # geom_ids -> parcel_ids return geom_id_to_parcel_id( pd.DataFrame(index=static_parcels), parcels).index.values
def static_parcels(settings, parcels): # list of geom_ids to not relocate static_parcels = settings["static_parcels"] # geom_ids -> parcel_ids return geom_id_to_parcel_id( pd.DataFrame(index=static_parcels), parcels).index.values