Esempio n. 1
0
def parcels_geography(parcels):
    df = pd.read_csv(
        os.path.join(misc.data_dir(), "02_01_2016_parcels_geography.csv"),
        index_col="geom_id")
    df = geom_id_to_parcel_id(df, parcels)

    # this will be used to map juris id to name
    juris_name = pd.read_csv(
        os.path.join(misc.data_dir(), "census_id_to_name.csv"),
        index_col="census_id").name10

    df["juris_name"] = df.jurisdiction_id.map(juris_name)

    df.loc[2054504, "juris_name"] = "Marin County"
    df.loc[2054505, "juris_name"] = "Santa Clara County"
    df.loc[2054506, "juris_name"] = "Marin County"
    df.loc[572927, "juris_name"] = "Contra Costa County"
    # assert no empty juris values
    assert True not in df.juris_name.isnull().value_counts()

    df["pda_id"] = df.pda_id.str.lower()

    # danville wasn't supposed to be a pda
    df["pda_id"] = df.pda_id.replace("dan1", np.nan)

    return df
Esempio n. 2
0
def parcels_geography(parcels, scenario, settings, policy):
    file = os.path.join(misc.data_dir(), "2021_02_25_parcels_geography.csv")
    print('Version of parcels_geography: {}'.format(file))
    df = pd.read_csv(file, index_col="geom_id")
    df = geom_id_to_parcel_id(df, parcels)

    # this will be used to map juris id to name
    juris_name = pd.read_csv(os.path.join(misc.data_dir(),
                                          "census_id_to_name.csv"),
                             index_col="census_id").name10

    df["juris_name"] = df.jurisdiction_id.map(juris_name)

    df.loc[2054504, "juris_name"] = "Marin County"
    df.loc[2054505, "juris_name"] = "Santa Clara County"
    df.loc[2054506, "juris_name"] = "Marin County"
    df.loc[572927, "juris_name"] = "Contra Costa County"
    # assert no empty juris values
    assert True not in df.juris_name.isnull().value_counts()

    df['juris_trich'] = df.juris + '-' + df.trich_id

    df["pda_id_pba40"] = df.pda_id_pba40.str.lower()
    # danville wasn't supposed to be a pda
    df["pda_id_pba40"] = df.pda_id_pba40.replace("dan1", np.nan)

    # Add Draft Blueprint geographies: PDA, TRA, PPA, sesit
    if scenario in policy['geographies_db_enable']:
        df["pda_id_pba50"] = df.pda_id_pba50.str.lower()
        df["gg_id"] = df.gg_id.str.lower()
        df["tra_id"] = df.tra_id.str.lower()
        df['juris_tra'] = df.juris + '-' + df.tra_id
        df["ppa_id"] = df.ppa_id.str.lower()
        df['juris_ppa'] = df.juris + '-' + df.ppa_id
        df["sesit_id"] = df.sesit_id.str.lower()
        df['juris_sesit'] = df.juris + '-' + df.sesit_id
    # Use Final Blueprint geographies: PDA, TRA, PPA, sesit
    elif scenario in policy['geographies_fb_enable']:
        df["pda_id_pba50"] = df.pda_id_pba50_fb.str.lower()
        df["gg_id"] = df.fbp_gg_id.str.lower()
        df["tra_id"] = df.fbp_tra_id.str.lower()
        df['juris_tra'] = df.juris + '-' + df.tra_id
        df["ppa_id"] = df.fbp_ppa_id.str.lower()
        df['juris_ppa'] = df.juris + '-' + df.ppa_id
        df["sesit_id"] = df.fbp_sesit_id.str.lower()
        df['juris_sesit'] = df.juris + '-' + df.sesit_id
    # Use EIR geographies: TRA, PPA, sesit, CoC
    elif scenario in policy['geographies_eir_enable']:
        df["pda_id_pba50"] = df.pda_id_pba50_fb.str.lower()
        df["gg_id"] = df.eir_gg_id.str.lower()
        df["tra_id"] = df.eir_tra_id.str.lower()
        df['juris_tra'] = df.juris + '-' + df.tra_id
        df["ppa_id"] = df.eir_ppa_id.str.lower()
        df['juris_ppa'] = df.juris + '-' + df.ppa_id
        df["sesit_id"] = df.eir_sesit_id.str.lower()
        df['juris_sesit'] = df.juris + '-' + df.sesit_id
        df['coc_id'] = df.eir_coc_id.str.lower()
        df['juris_coc'] = df.juris + '-' + df.coc_id

    return df
Esempio n. 3
0
def maz():
    maz = pd.read_csv(os.path.join(misc.data_dir(), "maz_geography.csv"))
    maz = maz.drop_duplicates('MAZ').set_index('MAZ')
    taz1454 = pd.read_csv(os.path.join(misc.data_dir(), "maz22_taz1454.csv"),
                          index_col='maz')
    maz['taz1454'] = taz1454.TAZ1454
    return maz
Esempio n. 4
0
def maz():
    maz = pd.read_csv(os.path.join(misc.data_dir(), "maz_geography.csv"))
    maz = maz.drop_duplicates('MAZ').set_index('MAZ')
    taz1454 = pd.read_csv(os.path.join(misc.data_dir(), "maz22_taz1454.csv"),
                          index_col='maz')
    maz['taz1454'] = taz1454.TAZ1454
    return maz
Esempio n. 5
0
def parcels_geography(parcels):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                                  "02_01_2016_parcels_geography.csv"),
                     index_col="geom_id")
    df = geom_id_to_parcel_id(df, parcels)

    # this will be used to map juris id to name
    juris_name = pd.read_csv(os.path.join(misc.data_dir(),
                                          "census_id_to_name.csv"),
                             index_col="census_id").name10

    df["juris_name"] = df.jurisdiction_id.map(juris_name)

    df.loc[2054504, "juris_name"] = "Marin County"
    df.loc[2054505, "juris_name"] = "Santa Clara County"
    df.loc[2054506, "juris_name"] = "Marin County"
    df.loc[572927, "juris_name"] = "Contra Costa County"
    # assert no empty juris values
    assert True not in df.juris_name.isnull().value_counts()

    df["pda_id"] = df.pda_id.str.lower()

    # danville wasn't supposed to be a pda
    df["pda_id"] = df.pda_id.replace("dan1", np.nan)

    return df
Esempio n. 6
0
def taz_geography():
    tg = pd.read_csv(os.path.join(misc.data_dir(), "taz_geography.csv"),
                     index_col="zone")
    sr = pd.read_csv(os.path.join(misc.data_dir(), "superdistricts.csv"),
                     index_col="number")
    tg["subregion_id"] = sr.subregion.loc[tg.superdistrict].values
    tg["subregion"] = tg.subregion_id.map({
        1: "Core",
        2: "Urban",
        3: "Suburban",
        4: "Rural"
    })
    return tg
Esempio n. 7
0
def taz_geography():
    tg = pd.read_csv(os.path.join(misc.data_dir(),
                     "taz_geography.csv"), index_col="zone")
    sr = pd.read_csv(os.path.join(misc.data_dir(),
                     "superdistricts.csv"), index_col="number")
    tg["subregion_id"] = sr.subregion.loc[tg.superdistrict].values
    tg["subregion"] = tg.subregion_id.map({
        1: "Core",
        2: "Urban",
        3: "Suburban",
        4: "Rural"
    })
    return tg
Esempio n. 8
0
def superdistricts(scenario):
    sd_scenario_file = os.path.join(
        misc.data_dir(), ("superdistricts_s{}.csv").format(scenario))
    # scenarios could contain policies (eg telework) and/or other modifications
    if os.path.isfile(sd_scenario_file):
        superdistricts = pd.read_csv(sd_scenario_file, index_col="number")
        orca.add_injectable("sqft_per_job_settings", "for this scenario")
    # the default includes a telework assumption and SD adjustments
    else:
        superdistricts = pd.read_csv(os.path.join(misc.data_dir(),
                                                  "superdistricts.csv"),
                                     index_col="number")
        orca.add_injectable("sqft_per_job_settings", "default")
    return superdistricts
Esempio n. 9
0
def parcels_geography(parcels):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                                  "02_01_2016_parcels_geography.csv"),
                     index_col="geom_id", dtype={'jurisdiction': 'str'})
    df = geom_id_to_parcel_id(df, parcels)

    juris_name = pd.read_csv(os.path.join(misc.data_dir(),
                                          "census_id_to_name.csv"),
                             index_col="census_id").name10

    df["juris_name"] = df.jurisdiction_id.map(juris_name)

    df["pda_id"] = df.pda_id.str.lower()

    return df
Esempio n. 10
0
def local_pois(settings):
    # because of the aforementioned limit of one netowrk at a time for the
    # POIS, as well as the large amount of memory used, this is now a
    # preprocessing step
    n = make_network(
        settings['build_networks']['walk']['name'],
        "weight", 3000)

    n.init_pois(
        num_categories=1,
        max_dist=3000,
        max_pois=1)

    cols = {}

    locations = pd.read_csv(os.path.join(misc.data_dir(), 'bart_stations.csv'))
    n.set_pois("tmp", locations.lng, locations.lat)
    cols["bartdist"] = n.nearest_pois(3000, "tmp", num_pois=1)[1]

    locname = 'pacheights'
    locs = orca.get_table('landmarks').local.query("name == '%s'" % locname)
    n.set_pois("tmp", locs.lng, locs.lat)
    cols["pacheights"] = n.nearest_pois(3000, "tmp", num_pois=1)[1]

    df = pd.DataFrame(cols)
    df.index.name = "node_id"
    df.to_csv('local_poi_distances.csv')
Esempio n. 11
0
def zoning_baseline(parcels, zoning_lookup):
    df = pd.read_csv(os.path.join(misc.data_dir(), "2015_08_13_zoning_parcels.csv"),
                     index_col="geom_id")

    df = pd.merge(df, zoning_lookup.to_frame(), left_on="zoning_id", right_index=True)
    df = geom_id_to_parcel_id(df, parcels)

    d = {
        "HS": "type1",
        "HT": "type2",
        "HM": "type3",
        "OF": "type4",
        "HO": "type5",
        "IL": "type7",
        "IW": "type8",
        "IH": "type9",
        "RS": "type10",
        "RB": "type11",
        "MR": "type12",
        "MT": "type13",
        "ME": "type14"
    }
    df.columns = [d.get(x, x) for x in df.columns]

    return df
Esempio n. 12
0
def parcels(store):
    df = store['parcels']
    df["zone_id"] = df.zone_id.replace(0, 1)

    cfg = {
        "fill_nas": {
            "zone_id": {
                "how": "mode",
                "type": "int"
            },
            "shape_area": {
                "how": "median",
                "type": "float"
            }
        }
    }
    df = utils.table_reprocess(cfg, df)

    # have to do it this way because otherwise it's a circular reference
    sdem = pd.read_csv(os.path.join(misc.data_dir(),
                                    "development_projects.csv"))
    # mark parcels that are going to be developed by the sdem
    df["sdem"] = df.geom_id.isin(sdem.geom_id).astype('int')

    return df
Esempio n. 13
0
def parcels(store):
    df = store['parcels']
    df["zone_id"] = df.zone_id.replace(0, 1)

    cfg = {
        "fill_nas": {
            "zone_id": {
                "how": "mode",
                "type": "int"
            },
            "shape_area": {
                "how": "median",
                "type": "float"
            }
        }
    }
    df = utils.table_reprocess(cfg, df)

    # have to do it this way because otherwise it's a circular reference
    sdem = pd.read_csv(os.path.join(misc.data_dir(),
                                    "development_projects.csv"))
    # mark parcels that are going to be developed by the sdem
    df["sdem"] = df.geom_id.isin(sdem.geom_id).astype('int')

    return df
Esempio n. 14
0
def parcels_geography(parcels):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                                  "02_01_2016_parcels_geography.csv"),
                     index_col="geom_id",
                     dtype={'jurisdiction': 'str'})
    df = geom_id_to_parcel_id(df, parcels)

    juris_name = pd.read_csv(os.path.join(misc.data_dir(),
                                          "census_id_to_name.csv"),
                             index_col="census_id").name10

    df["juris_name"] = df.jurisdiction_id.map(juris_name)

    df["pda_id"] = df.pda_id.str.lower()

    return df
Esempio n. 15
0
def make_network(name, weight_col, max_distance):
    st = pd.HDFStore(os.path.join(misc.data_dir(), name), "r")
    nodes, edges = st.nodes, st.edges
    net = pdna.Network(nodes["x"], nodes["y"], edges["from"], edges["to"],
                       edges[[weight_col]])
    net.precompute(max_distance)
    return net
Esempio n. 16
0
def make_network(name, weight_col, max_distance):
    st = pd.HDFStore(os.path.join(misc.data_dir(), name), "r")
    nodes, edges = st.nodes, st.edges
    net = pdna.Network(nodes["x"], nodes["y"], edges["from"], edges["to"],
                       edges[[weight_col]])
    net.precompute(max_distance)
    return net
Esempio n. 17
0
def zoning_np(parcels_geography):
    scenario_zoning = pd.read_csv(os.path.join(misc.data_dir(),
                                                 'zoning_mods_np.csv'))
    return pd.merge(parcels_geography.to_frame(),
                    scenario_zoning,
                    on=['jurisdiction', 'pda_id', 'tpp_id', 'exp_id'],
                    how='left')
Esempio n. 18
0
def development_projects(parcels, settings):
    df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv"))

    for fld in ['residential_sqft', 'residential_price', 'non_residential_price']:
        df[fld] = 0
    df["redfin_sale_year"] = 2012 # hedonic doesn't tolerate nans
    df["stories"] = df.stories.fillna(1)
    df["building_sqft"] = df.building_sqft.fillna(0)
    df["non_residential_sqft"] = df.non_residential_sqft.fillna(0)
    df["building_type_id"] = df.building_type.map(settings["building_type_map2"])

    df = df.dropna(subset=["geom_id"]) # need a geom_id to link to parcel_id

    df = df.dropna(subset=["year_built"]) # need a year built to get built

    df["geom_id"] = df.geom_id.astype("int")
    df = df.query('residential_units != "rent"')
    df["residential_units"] = df.residential_units.astype("int")
    df = df.set_index("geom_id")
    df = geom_id_to_parcel_id(df, parcels).reset_index() # use parcel id

    # we don't predict prices for schools and hotels right now
    df = df.query("building_type_id <= 4 or building_type_id >= 7")

    print "Describe of development projects"
    print df[orca.get_table('buildings').local_columns].describe()
    
    return df
Esempio n. 19
0
def build_networks(settings):
    name = settings["build_networks"]["name"]
    st = pd.HDFStore(os.path.join(misc.data_dir(), name), "r")
    nodes, edges = st.nodes, st.edges
    net = pdna.Network(nodes["x"], nodes["y"], edges["from"], edges["to"], edges[["weight"]])
    net.precompute(settings["build_networks"]["max_distance"])
    return net
Esempio n. 20
0
def local_pois(settings):
    # because of the aforementioned limit of one netowrk at a time for the
    # POIS, as well as the large amount of memory used, this is now a
    # preprocessing step
    n = make_network(
        settings['build_networks']['walk']['name'],
        "weight", 3000)

    n.init_pois(
        num_categories=1,
        max_dist=3000,
        max_pois=1)

    cols = {}

    locations = pd.read_csv(os.path.join(misc.data_dir(), 'bart_stations.csv'))
    n.set_pois("tmp", locations.lng, locations.lat)
    cols["bartdist"] = n.nearest_pois(3000, "tmp", num_pois=1)[1]

    locname = 'pacheights'
    locs = orca.get_table('landmarks').local.query("name == '%s'" % locname)
    n.set_pois("tmp", locs.lng, locs.lat)
    cols["pacheights"] = n.nearest_pois(3000, "tmp", num_pois=1)[1]

    df = pd.DataFrame(cols)
    df.index.name = "node_id"
    df.to_csv('local_poi_distances.csv')
Esempio n. 21
0
def build_networks(settings):
    name = settings['build_networks']['name']
    st = pd.HDFStore(os.path.join(misc.data_dir(), name), "r")
    nodes, edges = st.nodes, st.edges
    net = pdna.Network(nodes["x"], nodes["y"], edges["from"], edges["to"],
                       edges[["weight"]])
    net.precompute(settings['build_networks']['max_distance'])
    return net
Esempio n. 22
0
def non_mandatory_accessibility():
    fname = get_logsum_file('non_mandatory')
    df = pd.read_csv(os.path.join(misc.data_dir(), fname))
    df.loc[df.subzone == 0, 'subzone'] = 'a'
    df.loc[df.subzone == 1, 'subzone'] = 'b'
    df.loc[df.subzone == 2, 'subzone'] = 'c'
    df['taz_sub'] = df.taz.astype('str') + df.subzone
    return df.set_index('taz_sub')
Esempio n. 23
0
def non_mandatory_accessibility():
    fname = get_logsum_file('non_mandatory')
    df = pd.read_csv(os.path.join(
        misc.data_dir(), fname))
    df.loc[df.subzone == 0, 'subzone'] = 'c'  # no walk
    df.loc[df.subzone == 1, 'subzone'] = 'a'  # short walk
    df.loc[df.subzone == 2, 'subzone'] = 'b'  # long walk
    df['taz_sub'] = df.taz.astype('str') + df.subzone
    return df.set_index('taz_sub')
Esempio n. 24
0
def zoning_baseline(parcels, zoning_lookup, settings):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                     "2015_12_21_zoning_parcels.csv"),
                     index_col="geom_id")
    df = pd.merge(df, zoning_lookup.to_frame(),
                  left_on="zoning_id", right_index=True)
    df = geom_id_to_parcel_id(df, parcels)

    return df
Esempio n. 25
0
def zoning_baseline(parcels, zoning_lookup, settings):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                     "2015_12_21_zoning_parcels.csv"),
                     index_col="geom_id")
    df = pd.merge(df, zoning_lookup.to_frame(),
                  left_on="zoning_id", right_index=True)
    df = geom_id_to_parcel_id(df, parcels)

    return df
Esempio n. 26
0
def load_network_addons(network, file_name='PugetSoundNetworkAddons.h5'):
    store = pd.HDFStore(os.path.join(misc.data_dir(), file_name), "r")
    network.addons = {}    
    for attr in map(lambda x: x.replace('/', ''), store.keys()):
        network.addons[attr] = pd.DataFrame({"node_id": network.node_ids.values}, index=network.node_ids.values)
        tmp = store[attr].drop_duplicates("node_id")
        tmp["has_poi"] = np.ones(tmp.shape[0], dtype="bool8")
        network.addons[attr] = pd.merge(network.addons[attr], tmp, how='left', on="node_id")
        network.addons[attr].set_index('node_id', inplace=True)
Esempio n. 27
0
def non_mandatory_accessibility():
    fname = get_logsum_file('non_mandatory')
    orca.add_injectable("nonmand_acc_file_2010", fname)
    df = pd.read_csv(os.path.join(misc.data_dir(), fname))
    df.loc[df.subzone == 0, 'subzone'] = 'c'  # no walk
    df.loc[df.subzone == 1, 'subzone'] = 'a'  # short walk
    df.loc[df.subzone == 2, 'subzone'] = 'b'  # long walk
    df['taz_sub'] = df.taz.astype('str') + df.subzone
    return df.set_index('taz_sub')
 def craigslist():
     df = pd.read_csv(os.path.join(misc.data_dir(), "sfbay_craigslist.csv"))
     net = orca.get_injectable('net')
     df['node_id'] = net['walk'].get_node_ids(df['lon'], df['lat'])
     df['tmnode_id'] = net['drive'].get_node_ids(df['lon'], df['lat'])
     # fill nans -- missing bedrooms are mostly studio apts
     df['bedrooms'] = df.bedrooms.replace(np.nan, 1)
     df['neighborhood'] = df.neighborhood.replace(np.nan, '')
     return df
Esempio n. 29
0
 def craigslist():
     df = pd.read_csv(os.path.join(misc.data_dir(), "sfbay_craigslist.csv"))
     net = orca.get_injectable('net')
     df['node_id'] = net['walk'].get_node_ids(df['lon'], df['lat'])
     df['tmnode_id'] = net['drive'].get_node_ids(df['lon'], df['lat'])
     # fill nans -- missing bedrooms are mostly studio apts
     df['bedrooms'] = df.bedrooms.replace(np.nan, 1)
     df['neighborhood'] = df.neighborhood.replace(np.nan, '')
     return df
Esempio n. 30
0
def zoning_lookup():
    df = pd.read_csv(os.path.join(misc.data_dir(), "zoning_lookup.csv"))
    # this part is a bit strange - we do string matching on the names of zoning
    # in order ot link parcels and zoning and some of the strings have small
    # differences, so we copy the row and have different strings for the same
    # lookup row.  for now we drop duplicates of the id field in order to run
    # in urbansim (all the attributes of rows that share an id are the same -
    # only the name is different)
    df = df.drop_duplicates(subset='id').set_index('id')
    return df
Esempio n. 31
0
def verify():
    hdf_store = pd.HDFStore(os.path.join(misc.data_dir(),
                                         "run4032_school_v2_baseyear.h5"),
                            mode="r")

    new = verify_data_structure.yaml_from_store(hdf_store)
    with open(r"configs/data_structure.yaml", "w") as out:
        out.write(new)

    return hdf_store
Esempio n. 32
0
def zoning_lookup():
    df = pd.read_csv(os.path.join(misc.data_dir(), "zoning_lookup.csv"))
    # this part is a bit strange - we do string matching on the names of zoning
    # in order ot link parcels and zoning and some of the strings have small
    # differences, so we copy the row and have different strings for the same
    # lookup row.  for now we drop duplicates of the id field in order to run
    # in urbansim (all the attributes of rows that share an id are the same -
    # only the name is different)
    df = df.drop_duplicates(subset='id').set_index('id')
    return df
Esempio n. 33
0
def accessibilities_segmentation():
    fname = get_logsum_file('segmentation')
    df = pd.read_csv(os.path.join(misc.data_dir(), fname))
    df['AV'] = df['hasAV'].apply(lambda x: 'AV' if x == 1 else 'noAV')
    df['label'] = (df['incQ_label'] + '_' + df['autoSuff_label'] + '_' +
                   df['AV'])
    df = df.groupby('label').sum()
    df['prop'] = df['num_persons'] / df['num_persons'].sum()
    df = df[['prop']].transpose().reset_index(drop=True)
    return df
Esempio n. 34
0
def verify():
    hdf_store = pd.HDFStore(os.path.join(misc.data_dir(),
                                         "all_semcog_data_02-02-18.h5"),
                            mode="r")

    new = verify_data_structure.yaml_from_store(hdf_store)
    with open(r"configs/data_structure.yaml", "w") as out:
        out.write(new)

    return hdf_store
Esempio n. 35
0
def build_networks(parcels):
    st = pd.HDFStore(os.path.join(misc.data_dir(), "osm_sandag.h5"), "r")
    nodes, edges = st.nodes, st.edges
    net = pdna.Network(nodes["x"], nodes["y"], edges["from"], edges["to"],
                       edges[["weight"]])
    net.precompute(3000)
    orca.add_injectable("net", net)

    p = parcels.to_frame(parcels.local_columns)
    p['node_id'] = net.get_node_ids(p['x'], p['y'])
    orca.add_table("parcels", p)
Esempio n. 36
0
def build_networks(parcels):
    st = pd.HDFStore(os.path.join(misc.data_dir(), "osm_sandag.h5"), "r")
    nodes, edges = st.nodes, st.edges
    net = pdna.Network(nodes["x"], nodes["y"], edges["from"], edges["to"],
                       edges[["weight"]])
    net.precompute(3000)
    orca.add_injectable("net", net)
    
    p = parcels.to_frame(parcels.local_columns)
    p['node_id'] = net.get_node_ids(p['x'], p['y'])
    orca.add_table("parcels", p)
Esempio n. 37
0
def accessibilities_segmentation():
    fname = get_logsum_file('segmentation')
    df = pd.read_csv(os.path.join(
        misc.data_dir(), fname))
    df['AV'] = df['hasAV'].apply(lambda x: 'AV' if x == 1 else 'noAV')
    df['label'] = (df['incQ_label'] + '_' + df['autoSuff_label'] +
                   '_' + df['AV'])
    df = df.groupby('label').sum()
    df['prop'] = df['num_persons'] / df['num_persons'].sum()
    df = df[['prop']].transpose().reset_index(drop=True)
    return df
Esempio n. 38
0
def zoning_baseline(parcels, zoning_lookup, settings):
    file = os.path.join(misc.data_dir(),
                        "2020_11_05_zoning_parcels_hybrid_pba50.csv")
    print('Version of zoning_parcels: {}'.format(file))
    df = pd.read_csv(file, index_col="geom_id")
    df = pd.merge(df,
                  zoning_lookup.to_frame(),
                  left_on="zoning_id",
                  right_index=True)
    df = geom_id_to_parcel_id(df, parcels)

    return df
Esempio n. 39
0
def load_network(precompute=None, file_name='PugetSoundNetwork.h5'):
    # load OSM from hdf5 file
    store = pd.HDFStore(os.path.join(misc.data_dir(), file_name), "r")
    nodes = store.nodes
    edges = store.edges
    nodes.index.name = "index" # something that Synthicity wanted to fix
    # create the network
    net=pdna.Network(nodes["x"], nodes["y"], edges["from"], edges["to"], edges[["distance"]])
    if precompute is not None:
        for dist in precompute:
            net.precompute(dist)
    return net
Esempio n. 40
0
def development_projects(parcels, settings, scenario):
    df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv"))
    df = reprocess_dev_projects(df)

    df = df[df.action.isin(["add", "build"])]

    # this filters project by scenario
    colname = "scen%s" % scenario
    # df[colname] is 1s and 0s indicating whether to include it
    # this used to be an optional filter but now I'm going to require it so
    # that we don't accidentally include all the development projects since
    # we've started using scenario-based dev projects pretty extensively
    df = df[df[colname].astype('bool')]

    df = df.dropna(subset=['geom_id'])

    for fld in [
            'residential_sqft', 'residential_price', 'non_residential_price'
    ]:
        df[fld] = 0
    df["redfin_sale_year"] = 2012  # hedonic doesn't tolerate nans
    df["stories"] = df.stories.fillna(1)
    df["building_sqft"] = df.building_sqft.fillna(0)
    df["non_residential_sqft"] = df.non_residential_sqft.fillna(0)

    df["building_type"] = df.building_type.replace("HP", "OF")
    df["building_type"] = df.building_type.replace("GV", "OF")
    df["building_type"] = df.building_type.replace("SC", "OF")
    df["building_type_id"] = \
        df.building_type.map(settings["building_type_map2"])

    df = df.dropna(subset=["geom_id"])  # need a geom_id to link to parcel_id

    df = df.dropna(subset=["year_built"])  # need a year built to get built

    df["geom_id"] = df.geom_id.astype("int")
    df = df.query('residential_units != "rent"')
    df["residential_units"] = df.residential_units.fillna(0).astype("int")
    geom_id = df.geom_id
    df = df.set_index("geom_id")
    df = geom_id_to_parcel_id(df, parcels).reset_index()  # use parcel id
    df["geom_id"] = geom_id.values  # add it back again cause it goes away

    # we don't predict prices for schools and hotels right now
    df = df.query("building_type_id <= 4 or building_type_id >= 7")

    df["deed_restricted_units"] = 0

    print "Describe of development projects"
    print df[orca.get_table('buildings').local_columns].describe()

    return df
Esempio n. 41
0
def zoning_baseline(parcels, zoning_lookup, settings):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                     "2015_12_21_zoning_parcels.csv"),
                     index_col="geom_id")
    df = pd.merge(df, zoning_lookup.to_frame(),
                  left_on="zoning_id", right_index=True)
    df = geom_id_to_parcel_id(df, parcels)

    d = {k: "type%d" % v for k, v in settings["building_type_map2"].items()}

    df.columns = [d.get(x, x) for x in df.columns]

    return df
Esempio n. 42
0
def zoning_baseline(parcels, zoning_lookup, settings):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                     "2015_12_21_zoning_parcels.csv"),
                     index_col="geom_id")
    df = pd.merge(df, zoning_lookup.to_frame(),
                  left_on="zoning_id", right_index=True)
    df = geom_id_to_parcel_id(df, parcels)

    d = {k: "type%d" % v for k, v in settings["building_type_map2"].items()}

    df.columns = [d.get(x, x) for x in df.columns]

    return df
Esempio n. 43
0
def development_projects(parcels, settings, scenario):
    df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv"))
    df = reprocess_dev_projects(df)

    df = df[df.action.isin(["add", "build"])]

    # this filters project by scenario
    colname = "scen%s" % scenario
    # df[colname] is 1s and 0s indicating whether to include it
    # this used to be an optional filter but now I'm going to require it so
    # that we don't accidentally include all the development projects since
    # we've started using scenario-based dev projects pretty extensively
    df = df[df[colname].astype('bool')]

    df = df.dropna(subset=['geom_id'])

    for fld in ['residential_sqft', 'residential_price',
                'non_residential_price']:
        df[fld] = 0
    df["redfin_sale_year"] = 2012  # hedonic doesn't tolerate nans
    df["stories"] = df.stories.fillna(1)
    df["building_sqft"] = df.building_sqft.fillna(0)
    df["non_residential_sqft"] = df.non_residential_sqft.fillna(0)

    df["building_type"] = df.building_type.replace("HP", "OF")
    df["building_type"] = df.building_type.replace("GV", "OF")
    df["building_type"] = df.building_type.replace("SC", "OF")
    df["building_type_id"] = \
        df.building_type.map(settings["building_type_map2"])

    df = df.dropna(subset=["geom_id"])  # need a geom_id to link to parcel_id

    df = df.dropna(subset=["year_built"])  # need a year built to get built

    df["geom_id"] = df.geom_id.astype("int")
    df = df.query('residential_units != "rent"')
    df["residential_units"] = df.residential_units.fillna(0).astype("int")
    geom_id = df.geom_id
    df = df.set_index("geom_id")
    df = geom_id_to_parcel_id(df, parcels).reset_index()  # use parcel id
    df["geom_id"] = geom_id.values  # add it back again cause it goes away

    # we don't predict prices for schools and hotels right now
    df = df.query("building_type_id <= 4 or building_type_id >= 7")

    df["deed_restricted_units"] = 0

    print "Describe of development projects"
    print df[orca.get_table('buildings').local_columns].describe()

    return df
Esempio n. 44
0
def costar(store, parcels):
    df = pd.read_csv(os.path.join(misc.data_dir(), '2015_08_29_costar.csv'))
    df["PropertyType"] = df.PropertyType.replace("General Retail", "Retail")
    df = df[df.PropertyType.isin(["Office", "Retail", "Industrial"])]
    df["costar_rent"] = df["Average Weighted Rent"].astype('float')
    df["year_built"] = df["Year Built"].fillna(1980)
    df = df.dropna(subset=["costar_rent", "Latitude", "Longitude"])

    # now assign parcel id
    df["parcel_id"] = nearest_neighbor(
        parcels.to_frame(['x', 'y']).dropna(subset=['x', 'y']),
        df[['Longitude', 'Latitude']])

    return df
Esempio n. 45
0
def costar(store, parcels):
    df = pd.read_csv(os.path.join(misc.data_dir(), '2015_08_29_costar.csv'))
    df["PropertyType"] = df.PropertyType.replace("General Retail", "Retail")
    df = df[df.PropertyType.isin(["Office", "Retail", "Industrial"])]
    df["costar_rent"] = df["Average Weighted Rent"].astype('float')
    df["year_built"] = df["Year Built"].fillna(1980)
    df = df.dropna(subset=["costar_rent", "Latitude", "Longitude"])

    # now assign parcel id
    df["parcel_id"] = nearest_neighbor(
        parcels.to_frame(['x', 'y']).dropna(subset=['x', 'y']),
        df[['Longitude', 'Latitude']]
    )

    return df
Esempio n. 46
0
def update_sqftproforma(default_settings, yaml_file, proforma_uses, **kwargs):    
    # extract uses 
    blduses = proforma_uses[["building_type_id", "building_type_name", "is_residential"]].drop_duplicates()
    # put uses into the same order as the config file
    blduses = pd.merge(pd.DataFrame({"uses":default_settings.uses}), blduses, left_on="uses", right_on="building_type_name")
    # store in a dictionary
    local_settings = {}
    local_settings["uses"] = blduses.uses.values
    local_settings["residential_uses"] = blduses.is_residential
    local_settings["residential_uses"].index = blduses.building_type_id
    # get coefficient file for modeling price
    coeffile = os.path.join(misc.data_dir(), "expected_sales_unit_price_component_model_coefficients.csv")
    coefs = pd.read_csv(coeffile)
    coefs = pd.merge(coefs, proforma_uses[['building_type_name', "building_type_id"]].drop_duplicates(), right_on="building_type_id", left_on="sub_model_id", how="left")
    local_settings["price_coefs"] = coefs
    
    # Assemble forms
    forms = {}
    form_glut = {}
    form_density_type = {}
    for formid in np.unique(proforma_uses.template_id):
        subuse = proforma_uses[proforma_uses.template_id==formid]
        submerge = pd.merge(blduses, subuse, on='building_type_name', how="left")
        form_name = subuse.description.values[0]
        forms[form_name] = submerge.percent_building_sqft.fillna(0).values/100.
        form_glut[form_name] = subuse.generic_land_use_type_id.values[0]
        form_density_type[form_name] = subuse.density_type.values[0]

    # Conversion similar to sqftproforma._convert_types()
    local_settings["res_ratios"] = {}
    for form in forms.keys():
        forms[form] /= forms[form].sum() # normalize
        local_settings["res_ratios"][form] = pd.Series(forms[form][np.where(local_settings["residential_uses"])]).sum()
            
    all_default_settings = yaml_to_dict(None, yaml_file)
    local_settings["forms"] = forms
    local_settings["forms_df"] = pd.DataFrame(forms, index = local_settings["uses"]).transpose()
    local_settings["form_glut"] = form_glut
    local_settings["form_density_type"] = form_density_type
    local_settings["forms_to_test"] = None
    local_settings['percent_of_max_profit'] = all_default_settings.get('percent_of_max_profit', 100)
    pf = default_settings
    for attr in local_settings.keys():
        setattr(pf, attr, local_settings[attr])
    pf.reference_dict = sqftproforma.SqFtProFormaReference(**pf.__dict__).reference_dict

    pf = update_sqftproforma_reference(pf)    
    return pf
Esempio n. 47
0
def taz_geography(superdistricts):
    tg = pd.read_csv(os.path.join(misc.data_dir(), "taz_geography.csv"),
                     index_col="zone")

    # we want "subregion" geography on the taz_geography table
    # we have to go get it from the superdistricts table and join
    # using the superdistrcit id
    tg["subregion_id"] = \
        superdistricts.subregion.loc[tg.superdistrict].values
    tg["subregion"] = tg.subregion_id.map({
        1: "Core",
        2: "Urban",
        3: "Suburban",
        4: "Rural"
    })
    return tg
Esempio n. 48
0
def zoning_scenario(parcels_geography, scenario, settings):

    scenario_zoning = pd.read_csv(
        os.path.join(misc.data_dir(),
                     'zoning_mods_%s.csv' % scenario),
        dtype={'jurisdiction': 'str'})

    d = {k: "type%d" % v for k, v in settings["building_type_map2"].items()}

    for k, v in d.items():
        scenario_zoning[v] = scenario_zoning.add_bldg.str.contains(k)

    return pd.merge(parcels_geography.to_frame().reset_index(),
                    scenario_zoning,
                    on=['zoningmodcat'],
                    how='left').set_index('parcel_id')
Esempio n. 49
0
def taz_geography(superdistricts):
    tg = pd.read_csv(
        os.path.join(misc.data_dir(), "taz_geography.csv"),
        index_col="zone")

    # we want "subregion" geography on the taz_geography table
    # we have to go get it from the superdistricts table and join
    # using the superdistrcit id
    tg["subregion_id"] = \
        superdistricts.subregion.loc[tg.superdistrict].values
    tg["subregion"] = tg.subregion_id.map({
        1: "Core",
        2: "Urban",
        3: "Suburban",
        4: "Rural"
    })
    return tg
Esempio n. 50
0
def demolish_events(parcels, settings, scenario):
    df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv"))
    df = reprocess_dev_projects(df)

    # this filters project by scenario
    if scenario in df:
        # df[scenario] is 1s and 0s indicating whether to include it
        df = df[df[scenario].astype('bool')]

    # keep demolish and build records
    df = df[df.action.isin(["demolish", "build"])]

    df = df.dropna(subset=['geom_id'])
    df = df.set_index("geom_id")
    df = geom_id_to_parcel_id(df, parcels).reset_index()  # use parcel id

    return df
Esempio n. 51
0
def demolish_events(parcels, settings, scenario):
    df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv"))
    df = reprocess_dev_projects(df)

    # this filters project by scenario
    if scenario in df:
        # df[scenario] is 1s and 0s indicating whether to include it
        df = df[df[scenario].astype('bool')]

    # keep demolish and build records
    df = df[df.action.isin(["demolish", "build"])]

    df = df.dropna(subset=['geom_id'])
    df = df.set_index("geom_id")
    df = geom_id_to_parcel_id(df, parcels).reset_index()  # use parcel id

    return df
Esempio n. 52
0
def maz_forecast_inputs(regional_demographic_forecast):
    rdf = regional_demographic_forecast.to_frame()
    mfi = pd.read_csv(os.path.join(misc.data_dir(), "maz_forecast_inputs.csv"),
                      index_col='MAZ').replace('#DIV/0!', np.nan)

    # apply regional share of hh by size to MAZs with no households in 2010
    mfi.loc[mfi.shrs1_2010.isnull(), 'shrs1_2010'] = rdf.loc[rdf.year == 2010,
                                                             'shrs1'].values[0]
    mfi.loc[mfi.shrs2_2010.isnull(), 'shrs2_2010'] = rdf.loc[rdf.year == 2010,
                                                             'shrs2'].values[0]
    mfi.loc[mfi.shrs3_2010.isnull(), 'shrs3_2010'] = rdf.loc[rdf.year == 2010,
                                                             'shrs3'].values[0]
    # the fourth category here is missing the 'r' in the csv
    mfi.loc[mfi.shs4_2010.isnull(), 'shs4_2010'] = rdf.loc[rdf.year == 2010,
                                                           'shrs4'].values[0]
    mfi[['shrs1_2010', 'shrs2_2010', 'shrs3_2010', 'shs4_2010'
         ]] = mfi[['shrs1_2010', 'shrs2_2010', 'shrs3_2010',
                   'shs4_2010']].astype('float')
    return mfi
Esempio n. 53
0
def zoning_scenario(parcels_geography, scenario, settings):

    scenario_zoning = pd.read_csv(os.path.join(
        misc.data_dir(), 'zoning_mods_%s.csv' % scenario),
                                  dtype={'jurisdiction': 'str'})

    d = {k: "type%d" % v for k, v in settings["building_type_map2"].items()}

    for k, v in d.items():
        scenario_zoning['add-' + v] = scenario_zoning.add_bldg.str.contains(k)

    for k, v in d.items():
        scenario_zoning['drop-'+v] = scenario_zoning.drop_bldg.\
            astype(str).str.contains(k)

    return pd.merge(parcels_geography.to_frame().reset_index(),
                    scenario_zoning,
                    on=['zoningmodcat'],
                    how='left').set_index('parcel_id')
Esempio n. 54
0
def maz_forecast_inputs(regional_demographic_forecast):
    rdf = regional_demographic_forecast.to_frame()
    mfi = pd.read_csv(os.path.join(misc.data_dir(),
                                   "maz_forecast_inputs.csv"),
                      index_col='MAZ').replace('#DIV/0!', np.nan)

    # apply regional share of hh by size to MAZs with no households in 2010
    mfi.loc[mfi.shrs1_2010.isnull(),
            'shrs1_2010'] = rdf.loc[rdf.year == 2010, 'shrs1'].values[0]
    mfi.loc[mfi.shrs2_2010.isnull(),
            'shrs2_2010'] = rdf.loc[rdf.year == 2010, 'shrs2'].values[0]
    mfi.loc[mfi.shrs3_2010.isnull(),
            'shrs3_2010'] = rdf.loc[rdf.year == 2010, 'shrs3'].values[0]
    # the fourth category here is missing the 'r' in the csv
    mfi.loc[mfi.shs4_2010.isnull(),
            'shs4_2010'] = rdf.loc[rdf.year == 2010, 'shrs4'].values[0]
    mfi[['shrs1_2010', 'shrs2_2010', 'shrs3_2010',
         'shs4_2010']] = mfi[['shrs1_2010', 'shrs2_2010',
                              'shrs3_2010', 'shs4_2010']].astype('float')
    return mfi
Esempio n. 55
0
def zoning_scenario(parcels_geography, scenario, settings):
    scenario_zoning = pd.read_csv(
        os.path.join(misc.data_dir(), 'zoning_mods_%s.csv' % scenario))

    for k in settings["building_type_map"].keys():
        scenario_zoning[k] = np.nan

    def add_drop_helper(col, val):
        for ind, item in scenario_zoning[col].iteritems():
            if not isinstance(item, str):
                continue
            for btype in item.split():
                scenario_zoning.loc[ind, btype] = val

    add_drop_helper("add_bldg", 1)
    add_drop_helper("drop_bldg", 0)

    return pd.merge(parcels_geography.to_frame().reset_index(),
                    scenario_zoning,
                    on=['zoningmodcat'],
                    how='left').set_index('parcel_id')
Esempio n. 56
0
def zoning_scenario(parcels_geography, scenario, settings):
    scenario_zoning = pd.read_csv(
        os.path.join(misc.data_dir(), 'zoning_mods_%s.csv' % scenario))

    for k in settings["building_type_map"].keys():
        scenario_zoning[k] = np.nan

    def add_drop_helper(col, val):
        for ind, item in scenario_zoning[col].iteritems():
            if not isinstance(item, str):
                continue
            for btype in item.split():
                scenario_zoning.loc[ind, btype] = val

    add_drop_helper("add_bldg", 1)
    add_drop_helper("drop_bldg", 0)

    return pd.merge(parcels_geography.to_frame().reset_index(),
                    scenario_zoning,
                    on=['zoningmodcat'],
                    how='left').set_index('parcel_id')
Esempio n. 57
0
def zoning_scenario(parcels_geography, scenario, policy, mapping):

    if (scenario in ["11", "12", "15"]) and\
       (scenario not in policy["geographies_fr2_enable"]):
        scenario = str(int(scenario) - 10)

    scenario_zoning = pd.read_csv(
        os.path.join(misc.data_dir(), 'zoning_mods_%s.csv' % scenario))

    if "ppa_id" in scenario_zoning.columns:
        orca.add_injectable("ppa", "are included")
    else:
        orca.add_injectable("ppa", "are not included")

    for k in mapping["building_type_map"].keys():
        scenario_zoning[k] = np.nan

    def add_drop_helper(col, val):
        for ind, item in scenario_zoning[col].items():
            if not isinstance(item, str):
                continue
            for btype in item.split():
                scenario_zoning.loc[ind, btype] = val

    add_drop_helper("add_bldg", 1)
    add_drop_helper("drop_bldg", 0)

    if scenario in policy['geographies_fb_enable']:
        join_col = 'fbpzoningmodcat'
    elif scenario in policy['geographies_db_enable']:
        join_col = 'pba50zoningmodcat'
    elif 'zoninghzcat' in scenario_zoning.columns:
        join_col = 'zoninghzcat'
    else:
        join_col = 'zoningmodcat'

    return pd.merge(parcels_geography.to_frame().reset_index(),
                    scenario_zoning,
                    on=join_col,
                    how='left').set_index('parcel_id')
Esempio n. 58
0
def taz2_forecast_inputs(regional_demographic_forecast):
    t2fi = pd.read_csv(os.path.join(misc.data_dir(),
                                    "taz2_forecast_inputs.csv"),
                       index_col='TAZ').replace('#DIV/0!', np.nan)

    rdf = regional_demographic_forecast.to_frame()
    # apply regional share of hh by size to MAZs with no households in 2010
    t2fi.loc[t2fi.shrw0_2010.isnull(),
             'shrw0_2010'] = rdf.loc[rdf.year == 2010, 'shrw0'].values[0]
    t2fi.loc[t2fi.shrw1_2010.isnull(),
             'shrw1_2010'] = rdf.loc[rdf.year == 2010, 'shrw1'].values[0]
    t2fi.loc[t2fi.shrw2_2010.isnull(),
             'shrw2_2010'] = rdf.loc[rdf.year == 2010, 'shrw2'].values[0]
    t2fi.loc[t2fi.shrw3_2010.isnull(),
             'shrw3_2010'] = rdf.loc[rdf.year == 2010, 'shrw3'].values[0]

    # apply regional share of persons by age category
    t2fi.loc[t2fi.shra1_2010.isnull(),
             'shra1_2010'] = rdf.loc[rdf.year == 2010, 'shra1'].values[0]
    t2fi.loc[t2fi.shra2_2010.isnull(),
             'shra2_2010'] = rdf.loc[rdf.year == 2010, 'shra2'].values[0]
    t2fi.loc[t2fi.shra3_2010.isnull(),
             'shra3_2010'] = rdf.loc[rdf.year == 2010, 'shra3'].values[0]
    t2fi.loc[t2fi.shra4_2010.isnull(),
             'shra4_2010'] = rdf.loc[rdf.year == 2010, 'shra4'].values[0]

    # apply regional share of hh by presence of children
    t2fi.loc[t2fi.shrn_2010.isnull(), 'shrn_2010'] = rdf.loc[rdf.year == 2010,
                                                             'shrn'].values[0]
    t2fi.loc[t2fi.shry_2010.isnull(), 'shry_2010'] = rdf.loc[rdf.year == 2010,
                                                             'shry'].values[0]

    t2fi[[
        'shrw0_2010', 'shrw1_2010', 'shrw2_2010', 'shrw3_2010', 'shra1_2010',
        'shra2_2010', 'shra3_2010', 'shra4_2010', 'shrn_2010', 'shry_2010'
    ]] = t2fi[[
        'shrw0_2010', 'shrw1_2010', 'shrw2_2010', 'shrw3_2010', 'shra1_2010',
        'shra2_2010', 'shra3_2010', 'shra4_2010', 'shrn_2010', 'shry_2010'
    ]].astype('float')
    return t2fi
Esempio n. 59
0
def taz2_forecast_inputs(regional_demographic_forecast):
    t2fi = pd.read_csv(os.path.join(misc.data_dir(),
                                    "taz2_forecast_inputs.csv"),
                       index_col='TAZ').replace('#DIV/0!', np.nan)

    rdf = regional_demographic_forecast.to_frame()
    # apply regional share of hh by size to MAZs with no households in 2010
    t2fi.loc[t2fi.shrw0_2010.isnull(),
             'shrw0_2010'] = rdf.loc[rdf.year == 2010, 'shrw0'].values[0]
    t2fi.loc[t2fi.shrw1_2010.isnull(),
             'shrw1_2010'] = rdf.loc[rdf.year == 2010, 'shrw1'].values[0]
    t2fi.loc[t2fi.shrw2_2010.isnull(),
             'shrw2_2010'] = rdf.loc[rdf.year == 2010, 'shrw2'].values[0]
    t2fi.loc[t2fi.shrw3_2010.isnull(),
             'shrw3_2010'] = rdf.loc[rdf.year == 2010, 'shrw3'].values[0]

    # apply regional share of persons by age category
    t2fi.loc[t2fi.shra1_2010.isnull(),
             'shra1_2010'] = rdf.loc[rdf.year == 2010, 'shra1'].values[0]
    t2fi.loc[t2fi.shra2_2010.isnull(),
             'shra2_2010'] = rdf.loc[rdf.year == 2010, 'shra2'].values[0]
    t2fi.loc[t2fi.shra3_2010.isnull(),
             'shra3_2010'] = rdf.loc[rdf.year == 2010, 'shra3'].values[0]
    t2fi.loc[t2fi.shra4_2010.isnull(),
             'shra4_2010'] = rdf.loc[rdf.year == 2010, 'shra4'].values[0]

    # apply regional share of hh by presence of children
    t2fi.loc[t2fi.shrn_2010.isnull(),
             'shrn_2010'] = rdf.loc[rdf.year == 2010, 'shrn'].values[0]
    t2fi.loc[t2fi.shry_2010.isnull(),
             'shry_2010'] = rdf.loc[rdf.year == 2010, 'shry'].values[0]

    t2fi[['shrw0_2010', 'shrw1_2010', 'shrw2_2010', 'shrw3_2010',
          'shra1_2010', 'shra2_2010', 'shra3_2010', 'shra4_2010', 'shrn_2010',
          'shry_2010']] = t2fi[['shrw0_2010', 'shrw1_2010', 'shrw2_2010',
                                'shrw3_2010', 'shra1_2010', 'shra2_2010',
                                'shra3_2010', 'shra4_2010', 'shrn_2010',
                                'shry_2010']].astype('float')
    return t2fi
Esempio n. 60
0
def get_dev_projects_table(scenario, parcels):
    df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv"))
    df = reprocess_dev_projects(df)

    # this filters project by scenario
    if scenario in df:
        # df[scenario] is 1s and 0s indicating whether to include it
        df = df[df[scenario].astype('bool')]

    df = df.dropna(subset=['geom_id'])

    cnts = df.geom_id.isin(parcels.geom_id).value_counts()
    if False in cnts.index:
        print "%d MISSING GEOMIDS!" % cnts.loc[False]

    df = df[df.geom_id.isin(parcels.geom_id)]

    geom_id = df.geom_id  # save for later
    df = df.set_index("geom_id")
    df = geom_id_to_parcel_id(df, parcels).reset_index()  # use parcel id
    df["geom_id"] = geom_id.values  # add it back again cause it goes away

    return df