def list_tables(): """ List all registered tables. """ tables = orca.list_tables() return jsonify(tables=tables)
def parcel_average_price(use, quantile=.5): # I'm testing out a zone aggregation rather than a network aggregation # because I want to be able to determine the quantile of the distribution # I also want more spreading in the development and not keep it localized if use == "residential": # get node price average and put it on parcels s = misc.reindex( orca.get_table('nodes')[use], orca.get_table('parcels').node_id) * 1.3 # apply shifters cost_shifters = orca.get_table("parcels").cost_shifters price_shifters = orca.get_table("parcels").price_shifters s = s / cost_shifters * price_shifters # just to make sure s = s.fillna(0).clip(150, 1250) return s if 'nodes' not in orca.list_tables(): return pd.Series(0, orca.get_table('parcels').index) return misc.reindex( orca.get_table('nodes')[use], orca.get_table('parcels').node_id)
def parcel_average_price(use, quantile=.5): if 'nodes' not in orca.list_tables(): # just to keep from erroring print "WARNING: Using potentially broken function parcel_average_price" return pd.Series(0, orca.get_table('parcels').index) if use not in orca.get_table('nodes').columns: orca.run(['neighborhood_vars', 'price_vars']) if use not in orca.get_table('nodes').columns: # just to keep from erroring print "WARNING: Using potentially broken function parcel_average_price" return pd.Series(0, orca.get_table('parcels').index) if use == "residential": # get node price average and put it on parcels col = misc.reindex( orca.get_table('nodes')[use], orca.get_table('parcels').node_id) # apply shifters cost_shifters = orca.get_table("parcels").cost_shifters price_shifters = orca.get_table("parcels").price_shifters col = col / cost_shifters * price_shifters # just to make sure we're in a reasonable range return col.fillna(0).clip(150, 1250) return misc.reindex( orca.get_table('nodes')[use], orca.get_table('parcels').node_id)
def orca_dataframe_tables(): """ Return a list of the neames of all currently registered dataframe tables """ return [ name for name in orca.list_tables() if orca.table_type(name) == 'dataframe' ]
def test_hdf(orca_session, data): """ Test loading data from an HDF file. """ t = LoadTable() t.table = 'buildings' t.source_type = 'hdf' t.path = 'data/buildings.hdf' assert 'buildings' not in orca.list_tables() modelmanager.register(t) assert 'buildings' in orca.list_tables() _ = orca.get_table('buildings').to_frame() modelmanager.initialize() assert 'buildings' in orca.list_tables() modelmanager.remove_step(t.name)
def validate_all_tables(): """ Validate all tables registered with Orca. See ``validate_table()`` above. Returns ------- bool """ for t in orca.list_tables(): validate_table(t, reciprocal=False)
def test_csv(orca_session, data): """ Test loading data from a CSV file. """ t = LoadTable() t.table = 'buildings' t.source_type = 'csv' t.path = 'data/buildings.csv' t.csv_index_cols = 'building_id' assert 'buildings' not in orca.list_tables() modelmanager.register(t) assert 'buildings' in orca.list_tables() _ = orca.get_table('buildings').to_frame() modelmanager.initialize() assert 'buildings' in orca.list_tables() modelmanager.remove_step(t.name)
def test_extra_settings(orca_session, data): """ Test loading data with extra settings, e.g. for compressed files. """ t = LoadTable() t.table = 'buildings' t.source_type = 'csv' t.path = 'data/buildings.csv.gz' t.csv_index_cols = 'building_id' t.extra_settings = {'compression': 'gzip'} assert 'buildings' not in orca.list_tables() modelmanager.register(t) assert 'buildings' in orca.list_tables() _ = orca.get_table('buildings').to_frame() modelmanager.initialize() assert 'buildings' in orca.list_tables() modelmanager.remove_step(t.name)
def schema(): """ All tables, columns, steps, injectables and broadcasts registered with Orca. Inc.des local columns on tables. """ tables = orca.list_tables() cols = {t: orca.get_table(t).columns for t in tables} steps = orca.list_steps() injectables = orca.list_injectables() broadcasts = orca.list_broadcasts() return jsonify(tables=tables, columns=cols, steps=steps, injectables=injectables, broadcasts=broadcasts)
def schema(): """ All tables, columns, steps, injectables and broadcasts registered with Orca. Inc.des local columns on tables. """ tables = orca.list_tables() cols = {t: orca.get_table(t).columns for t in tables} steps = orca.list_steps() injectables = orca.list_injectables() broadcasts = orca.list_broadcasts() return jsonify( tables=tables, columns=cols, steps=steps, injectables=injectables, broadcasts=broadcasts)
def test_without_autorun(orca_session, data): """ Confirm that disabling autorun works. """ t = LoadTable() t.table = 'buildings' t.source_type = 'csv' t.path = 'data/buildings.csv' t.csv_index_cols = 'building_id' t.autorun = False modelmanager.register(t) assert 'buildings' not in orca.list_tables() modelmanager.remove_step(t.name)
def parcel_average_price(use, quantile=.5): # I'm testing out a zone aggregation rather than a network aggregation # because I want to be able to determine the quantile of the distribution # I also want more spreading in the development and not keep it so localized if use == "residential": buildings = orca.get_table('buildings') s = misc.reindex(buildings. residential_price[buildings.general_type == "Residential"]. groupby(buildings.zone_id).quantile(.8), orca.get_table('parcels').zone_id).clip(150, 1250) cost_shifters = orca.get_table("parcels").cost_shifters price_shifters = orca.get_table("parcels").price_shifters return s / cost_shifters * price_shifters if 'nodes' not in orca.list_tables(): return pd.Series(0, orca.get_table('parcels').index) return misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id)
def parcel_average_price(use, quantile=.5): if use == "residential": # get node price average and put it on parcels s = misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id) # apply shifters cost_shifters = orca.get_table("parcels").cost_shifters price_shifters = orca.get_table("parcels").price_shifters taz2_shifters = orca.get_table("parcels").taz2_price_shifters s = s / cost_shifters * price_shifters * taz2_shifters # just to make sure we're in a reasonable range return s.fillna(0).clip(150, 1250) if 'nodes' not in orca.list_tables(): # just to keep from erroring return pd.Series(0, orca.get_table('parcels').index) return misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id)
def parcel_average_price(use, quantile=.5): if use == "residential": # get node price average and put it on parcels s = misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id) cost_shifters = orca.get_table("parcels").cost_shifters price_shifters = orca.get_table("parcels").price_shifters taz2_shifters = orca.get_table("parcels").taz2_price_shifters s = s / cost_shifters * price_shifters * taz2_shifters # just to make sure we're in a reasonable range return s.fillna(0).clip(150, 1250) if 'nodes' not in orca.list_tables(): # just to keep from erroring return pd.Series(0, orca.get_table('parcels').index) return misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id)
def parcel_average_price(use, quantile=.5): # I'm testing out a zone aggregation rather than a network aggregation # because I want to be able to determine the quantile of the distribution # I also want more spreading in the development and not keep it localized if use == "residential": # get node price average and put it on parcels s = misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id) * 1.3 # apply shifters cost_shifters = orca.get_table("parcels").cost_shifters price_shifters = orca.get_table("parcels").price_shifters s = s / cost_shifters * price_shifters # just to make sure s = s.fillna(0).clip(150, 1250) return s if 'nodes' not in orca.list_tables(): return pd.Series(0, orca.get_table('parcels').index) return misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id)
def validate(self): """ Check some basic expectations about the table generated by the step: - Confirm that the table includes a unique, named index column (primary key) or set of columns (composite key). If not, raise a ValueError. - If the table contains columns whose names match the index columns of tables previously registered with Orca, check whether they make sense as join keys. Print a status message with the number of presumptive foreign-key values that are found in the primary key column. - Perform the same check for columns in previously registered tables whose names match the index of the table generated by this step. - It doesn't currently compare indexes to indexes. (Maybe it should?) Running this will trigger loading all registered Orca tables into memory, which may take a while if they have not yet been loaded. Stand-alone columns will not be loaded unless their names match an index column. Returns ------- bool """ # There are a couple of reasons we're not using the orca_test library here: # (a) orca_test doesn't currently support MultiIndexes, and (b) the primary-key/ # foreign-key comparisons aren't asserting anything, just printing status # messages. We should update orca_test to support both, probably. # Register table if needed if not orca.is_table(self.table): self.run() idx = orca.get_table(self.table).index # Check index has a name if list(idx.names) == [None]: raise ValueError("Index column has no name") # Check index is unique if len(idx.unique()) < len(idx): raise ValueError("Index not unique") # Compare columns to indexes of other tables, and vice versa combinations = [(self.table, t) for t in orca.list_tables() if self.table != t] \ + [(t, self.table) for t in orca.list_tables() if self.table != t] for t1, t2 in combinations: col_names = orca.get_table(t1).columns idx = orca.get_table(t2).index if set(idx.names).issubset(col_names): vals = orca.get_table(t1).to_frame(idx.names).drop_duplicates() # Easier to compare multi-column values to multi-column index if we # turn the values into an index as well vals = vals.reset_index().set_index(idx.names).index vals_in_idx = sum(vals.isin(idx)) if len(idx.names) == 1: idx_str = idx.names[0] else: idx_str = '[{}]'.format(','.join(idx.names)) print("'{}.{}': {} of {} unique values are found in '{}.{}' ({}%)"\ .format(t1, idx_str, vals_in_idx, len(vals), t2, idx_str, round(100*vals_in_idx/len(vals)))) return True
def run_developer(forms, agents, buildings, supply_fname, parcel_size, ave_unit_size, total_units, feasibility, year=None, target_vacancy=.1, form_to_btype_callback=None, add_more_columns_callback=None, max_parcel_size=2000000, residential=True, bldg_sqft_per_job=400.0, min_unit_size=400, remove_developed_buildings=True, unplace_agents=['households', 'jobs'], num_units_to_build=None): """ Run the developer model to pick and build buildings Parameters ---------- forms : string or list of strings Passed directly dev.pick agents : DataFrame Wrapper Used to compute the current demand for units/floorspace in the area buildings : DataFrame Wrapper Used to compute the current supply of units/floorspace in the area supply_fname : string Identifies the column in buildings which indicates the supply of units/floorspace parcel_size : Series Passed directly to dev.pick ave_unit_size : Series Passed directly to dev.pick - average residential unit size total_units : Series Passed directly to dev.pick - total current residential_units / job_spaces feasibility : DataFrame Wrapper The output from feasibility above (the table called 'feasibility') year : int The year of the simulation - will be assigned to 'year_built' on the new buildings target_vacancy : float The target vacancy rate - used to determine how much to build form_to_btype_callback : function Will be used to convert the 'forms' in the pro forma to 'building_type_id' in the larger model add_more_columns_callback : function Takes a dataframe and returns a dataframe - is used to make custom modifications to the new buildings that get added max_parcel_size : float Passed directly to dev.pick - max parcel size to consider min_unit_size : float Passed directly to dev.pick - min unit size that is valid residential : boolean Passed directly to dev.pick - switches between adding/computing residential_units and job_spaces bldg_sqft_per_job : float Passed directly to dev.pick - specified the multiplier between floor spaces and job spaces for this form (does not vary by parcel as ave_unit_size does) remove_redeveloped_buildings : optional, boolean (default True) Remove all buildings on the parcels which are being developed on unplace_agents : optional , list of strings (default ['households', 'jobs']) For all tables in the list, will look for field building_id and set it to -1 for buildings which are removed - only executed if remove_developed_buildings is true num_units_to_build: optional, int If num_units_to_build is passed, build this many units rather than computing it internally by using the length of agents adn the sum of the relevant supply columin - this trusts the caller to know how to compute this. Returns ------- Writes the result back to the buildings table and returns the new buildings with available debugging information on each new building """ dev = developer.Developer(feasibility.to_frame()) target_units = num_units_to_build or dev.\ compute_units_to_build(len(agents), buildings[supply_fname].sum(), target_vacancy) print "{:,} feasible buildings before running developer".format( len(dev.feasibility)) new_buildings = dev.pick(forms, target_units, parcel_size, ave_unit_size, total_units, max_parcel_size=max_parcel_size, min_unit_size=min_unit_size, drop_after_build=True, residential=residential, bldg_sqft_per_job=bldg_sqft_per_job) orca.add_table("feasibility", dev.feasibility) if new_buildings is None: return if len(new_buildings) == 0: return new_buildings if year is not None: new_buildings["year_built"] = year if not isinstance(forms, list): # form gets set only if forms is a list new_buildings["form"] = forms if form_to_btype_callback is not None: new_buildings["building_type_id"] = new_buildings.\ apply(form_to_btype_callback, axis=1) new_buildings["stories"] = new_buildings.stories.apply(np.ceil) ret_buildings = new_buildings if add_more_columns_callback is not None: new_buildings = add_more_columns_callback(new_buildings) print "Adding {:,} buildings with {:,} {}".\ format(len(new_buildings), int(new_buildings[supply_fname].sum()), supply_fname) print "{:,} feasible buildings after running developer".format( len(dev.feasibility)) old_buildings = buildings.to_frame(buildings.local_columns) new_buildings = new_buildings[buildings.local_columns] if remove_developed_buildings: old_buildings = \ _remove_developed_buildings(old_buildings, new_buildings, unplace_agents) all_buildings = dev.merge(old_buildings, new_buildings) orca.add_table("buildings", all_buildings) if "residential_units" in orca.list_tables() and residential: # need to add units to the units table as well old_units = orca.get_table("residential_units") old_units = old_units.to_frame(old_units.local_columns) new_units = pd.DataFrame({ "unit_residential_price": 0, "num_units": 1, "deed_restricted": 0, "unit_num": np.concatenate([np.arange(i) for i in \ new_buildings.residential_units.values]), "building_id": np.repeat(new_buildings.index.values, new_buildings.residential_units.\ astype('int32').values) }).sort(columns=["building_id", "unit_num"]).reset_index(drop=True) print "Adding {:,} units to the residential_units table".\ format(len(new_units)) all_units = dev.merge(old_units, new_units) all_units.index.name = "unit_id" orca.add_table("residential_units", all_units) return ret_buildings # pondered returning ret_buildings, new_units but users can get_table # the units if they want them - better to avoid breaking the api return ret_buildings
def validate_table(table, reciprocal=True): """ Check some basic expectations about an Orca table: - Confirm that it includes a unique, named index column (a.k.a. primary key) or set of columns (multi-index, a.k.a. composite key). If not, raise a ValueError. - Confirm that none of the other columns in the table share names with the index(es). If they do, raise a ValueError. - If the table contains columns whose names match the index columns of other tables registered with Orca, check whether they make sense as join keys. This prints a status message with the number of presumptive foreign-key values that are found in the primary/composite key, for evaluation by the user. - Perform the same check for columns in _other_ tables whose names match the index column(s) of _this_ table. - It doesn't currently compare indexes to indexes. (Maybe it should?) Running this will trigger loading all registered Orca tables, which may take a while. Stand-alone columns will not be loaded unless their names match an index column. Doesn't currently incorporate ``orca_test`` validation, but it might be added. Parameters ---------- table : str Name of Orca table to validate. reciprocal : bool, default True Whether to also check how columns of other tables align with this one's index. If False, only check this table's columns against other tables' indexes. Returns ------- bool """ # There are a couple of reasons we're not using the orca_test library here: # (a) orca_test doesn't currently support MultiIndexes, and (b) the primary-key/ # foreign-key comparisons aren't asserting anything, just printing status # messages. We should update orca_test to support both, probably. if not orca.is_table(table): raise ValueError("Table not registered with Orca: '{}'".format(table)) idx = orca.get_table(table).index # Check index has a name if list(idx.names) == [None]: raise ValueError("Index column has no name") # Check for unique column names for name in list(idx.names): if name in list(orca.get_table(table).columns): raise ValueError( "Index names and column names overlap: '{}'".format(name)) # Check for unique index values if len(idx.unique()) < len(idx): raise ValueError("Index not unique") # Compare columns to indexes of other tables, and vice versa combinations = [(table, t) for t in orca.list_tables() if table != t] if reciprocal: combinations += [(t, table) for t in orca.list_tables() if table != t] for t1, t2 in combinations: col_names = orca.get_table(t1).columns idx = orca.get_table(t2).index if set(idx.names).issubset(col_names): vals = orca.get_table(t1).to_frame(idx.names).drop_duplicates() # Easier to compare multi-column values to multi-column index if we # turn the values into an index as well vals = vals.reset_index().set_index(idx.names).index vals_in_idx = sum(vals.isin(idx)) if len(idx.names) == 1: idx_str = idx.names[0] else: idx_str = '[{}]'.format(','.join(idx.names)) print("'{}.{}': {} of {} unique values are found in '{}.{}' ({}%)"\ .format(t1, idx_str, vals_in_idx, len(vals), t2, idx_str, round(100*vals_in_idx/len(vals)))) return True
def tables_in_base_year(): h5store = pd.HDFStore(os.path.join(misc.data_dir(), yamlio.yaml_to_dict(str_or_buffer=os.path.join(misc.configs_dir(), "settings.yaml"))['store']), mode="r") store_table_names = orca.get_injectable('store_table_names_dict') return [t for t in orca.list_tables() if t in h5store or store_table_names.get(t, None) in h5store]
def test_table_list(expected_tables): print(orca.list_tables()) assert orca.list_tables() == expected_tables
def run_developer(forms, agents, buildings,supply_fname, parcel_size, ave_unit_size, total_units, feasibility, max_dua_zoning, max_res_units, addl_units,year=None, target_vacancy=.1, use_max_res_units=False, form_to_btype_callback=None, add_more_columns_callback=None, max_parcel_size=2000000, residential=True, bldg_sqft_per_job=400.0, min_unit_size=400, remove_developed_buildings=True, unplace_agents=['households', 'jobs'], num_units_to_build=None, profit_to_prob_func=None): """ Run the developer model to pick and build buildings Parameters ---------- forms : string or list of strings Passed directly dev.pick agents : DataFrame Wrapper Used to compute the current demand for units/floorspace in the area buildings : DataFrame Wrapper Used to compute the current supply of units/floorspace in the area supply_fname : string Identifies the column in buildings which indicates the supply of units/floorspace parcel_size : Series Passed directly to dev.pick ave_unit_size : Series Passed directly to dev.pick - average residential unit size total_units : Series Passed directly to dev.pick - total current residential_units / job_spaces feasibility : DataFrame Wrapper The output from feasibility above (the table called 'feasibility') year : int The year of the simulation - will be assigned to 'year_built' on the new buildings target_vacancy : float The target vacancy rate - used to determine how much to build form_to_btype_callback : function Will be used to convert the 'forms' in the pro forma to 'building_type_id' in the larger model add_more_columns_callback : function Takes a dataframe and returns a dataframe - is used to make custom modifications to the new buildings that get added max_parcel_size : float Passed directly to dev.pick - max parcel size to consider min_unit_size : float Passed directly to dev.pick - min unit size that is valid residential : boolean Passed directly to dev.pick - switches between adding/computing residential_units and job_spaces bldg_sqft_per_job : float Passed directly to dev.pick - specified the multiplier between floor spaces and job spaces for this form (does not vary by parcel as ave_unit_size does) remove_redeveloped_buildings : optional, boolean (default True) Remove all buildings on the parcels which are being developed on unplace_agents : optional , list of strings (default ['households', 'jobs']) For all tables in the list, will look for field building_id and set it to -1 for buildings which are removed - only executed if remove_developed_buildings is true num_units_to_build: optional, int If num_units_to_build is passed, build this many units rather than computing it internally by using the length of agents adn the sum of the relevant supply columin - this trusts the caller to know how to compute this. profit_to_prob_func: func Passed directly to dev.pick Returns ------- Writes the result back to the buildings table and returns the new buildings with available debugging information on each new building """ # num_units_to_build = target_units_def() # ave_unit_size = target_avg_unit_size() dev = developer.Developer(feasibility.to_frame()) target_units = num_units_to_build or dev.\ compute_units_to_build(len(agents), buildings[supply_fname].sum(), target_vacancy) print "{:,} feasible buildings before running developer".format( len(dev.feasibility)) #df = dev.feasibility['residential'] df = dev.feasibility df['residential','max_profit_orig'] = df['residential','max_profit'] df['residential', 'max_profit'].loc[df['residential','max_profit_orig'] < 0] = .001 orca.add_table("feasibility", df) parcels = orca.get_table('parcels').to_frame() df = df['residential'] settings = orca.get_injectable('settings') df["parcel_size"] = parcel_size df["ave_unit_size"] = ave_unit_size df['current_units'] = total_units df['max_dua_zoning'] = max_dua_zoning df['max_res_units'] = max_res_units df['addl_units'] = addl_units df['zoning_id'] = parcels.zoning_id df['siteid'] = parcels.siteid df['zoning_schedule_id'] = parcels.zoning_schedule_id df['acres'] = parcels.parcel_acres df['land_cost_per_sqft'] = settings['default_land_cost'] df['cap_rate'] = settings['sqftproforma_config']['cap_rate'] df['building_efficiency'] = settings['sqftproforma_config']['building_efficiency'] df['min_size_per_unit'] = min_unit_size df['max_dua_from_zoning'] = df['max_dua_zoning'] df['development_type_id'] = parcels.development_type_id df = df[df.parcel_size < max_parcel_size] ''' df['units_from_max_dua_zoning'] = np.NaN df.loc[df['max_dua_from_zoning'] >= 0, 'units_from_max_dua_zoning'] = (df.max_dua_from_zoning * df.acres).round() df['units_from_max_res_zoning'] = df['max_res_units'] df['units_from_zoning'] = np.NaN # final units from zoning df.loc[(df['units_from_max_res_zoning'] >= 0) & (df['units_from_max_dua_zoning'].isnull()), 'units_from_zoning'] = df[ 'units_from_max_res_zoning'] df.loc[(df['units_from_max_res_zoning'].isnull()) & (df['units_from_max_dua_zoning'] >= 0), 'units_from_zoning'] = df[ 'units_from_max_dua_zoning'] df.loc[(df['units_from_max_res_zoning'].isnull()) & (df['units_from_max_dua_zoning'].isnull()), 'units_from_zoning'] = 0 df.loc[(df['units_from_max_res_zoning'] >= 0) & (df['units_from_max_dua_zoning'] >= 0), 'units_from_zoning'] = df[ ['units_from_max_res_zoning', 'units_from_max_dua_zoning']].min(axis=1) ''' ################################################################################################### # for schedule 2 ONLY df['units_from_min_unit_size'] = (df['residential_sqft'] / min_unit_size).round() # df.loc[(df['units_from_max_res_zoning'].isnull()), 'units_from_zoning'] = 0 # end for schedule 2 ONLY ####################################################################################################### df.loc[(df['siteid'] > 0), 'units_from_zoning'] = 0 df['final_units_constrained_by_size'] = df[['addl_units', 'units_from_min_unit_size']].min( axis=1) df['unit_size_from_final'] = df['residential_sqft'] / df['units_from_zoning'] df.loc[(df['unit_size_from_final'] < min_unit_size), 'unit_size_from_final'] = min_unit_size df['final_units_constrained_by_size'] = (df['residential_sqft'] / df['unit_size_from_final']).round() df['net_units'] = df.final_units_constrained_by_size df['roi'] = df['max_profit'] / df['total_cost'] df = df.reset_index(drop=False) df = df.set_index(['parcel_id']) unit_size_from_final = df.unit_size_from_final new_buildings = dev.pick(forms, target_units, parcel_size, unit_size_from_final, total_units, max_parcel_size=max_parcel_size, min_unit_size=min_unit_size, drop_after_build=False, residential=residential, bldg_sqft_per_job=bldg_sqft_per_job, profit_to_prob_func=profit_to_prob_func) orca.add_table("feasibility", dev.feasibility) if new_buildings is None: return if len(new_buildings) == 0: return new_buildings if year is not None: new_buildings["year_built"] = year if not isinstance(forms, list): # form gets set only if forms is a list new_buildings["form"] = forms if form_to_btype_callback is not None: new_buildings["building_type_id"] = new_buildings.\ apply(form_to_btype_callback, axis=1) new_buildings["stories"] = new_buildings.stories.apply(np.ceil) ret_buildings = new_buildings if add_more_columns_callback is not None: new_buildings = add_more_columns_callback(new_buildings) print "Adding {:,} buildings with {:,} {}".\ format(len(new_buildings), int(new_buildings['net_units'].sum()), supply_fname) print "{:,} feasible buildings after running developer".format( len(dev.feasibility)) old_buildings = buildings.to_frame(buildings.local_columns) new_buildings = new_buildings[buildings.local_columns] new_buildings['new_bldg'] = True new_buildings['sch_dev'] = False new_buildings['new_units'] = new_buildings['residential_units'] if remove_developed_buildings: old_buildings = \ utils._remove_developed_buildings(old_buildings, new_buildings, unplace_agents) all_buildings, new_index = dev.merge(old_buildings, new_buildings, return_index=True) ret_buildings.index = new_index orca.add_table("buildings", all_buildings) if "residential_units" in orca.list_tables() and residential: # need to add units to the units table as well old_units = orca.get_table("residential_units") old_units = old_units.to_frame(old_units.local_columns) new_units = pd.DataFrame({ "unit_residential_price": 0, "num_units": 1, "deed_restricted": 0, "unit_num": np.concatenate([np.arange(i) for i in \ new_buildings.residential_units.values]), "building_id": np.repeat(new_buildings.index.values, new_buildings.residential_units.\ astype('int32').values) }).sort(columns=["building_id", "unit_num"]).reset_index(drop=True) print "Adding {:,} units to the residential_units table".\ format(len(new_units)) all_units = dev.merge(old_units, new_units) all_units.index.name = "unit_id" orca.add_table("residential_units", all_units) return ret_buildings # pondered returning ret_buildings, new_units but users can get_table # the units if they want them - better to avoid breaking the api return ret_buildings