Exemple #1
0
def list_tables():
    """
    List all registered tables.

    """
    tables = orca.list_tables()
    return jsonify(tables=tables)
Exemple #2
0
def list_tables():
    """
    List all registered tables.

    """
    tables = orca.list_tables()
    return jsonify(tables=tables)
Exemple #3
0
def parcel_average_price(use, quantile=.5):
    # I'm testing out a zone aggregation rather than a network aggregation
    # because I want to be able to determine the quantile of the distribution
    # I also want more spreading in the development and not keep it localized
    if use == "residential":

        # get node price average and put it on parcels
        s = misc.reindex(
            orca.get_table('nodes')[use],
            orca.get_table('parcels').node_id) * 1.3

        # apply shifters
        cost_shifters = orca.get_table("parcels").cost_shifters
        price_shifters = orca.get_table("parcels").price_shifters
        s = s / cost_shifters * price_shifters

        # just to make sure
        s = s.fillna(0).clip(150, 1250)
        return s

    if 'nodes' not in orca.list_tables():
        return pd.Series(0, orca.get_table('parcels').index)

    return misc.reindex(
        orca.get_table('nodes')[use],
        orca.get_table('parcels').node_id)
def parcel_average_price(use, quantile=.5):
    if 'nodes' not in orca.list_tables():
        # just to keep from erroring
        print "WARNING: Using potentially broken function parcel_average_price"
        return pd.Series(0, orca.get_table('parcels').index)

    if use not in orca.get_table('nodes').columns:
        orca.run(['neighborhood_vars', 'price_vars'])
        if use not in orca.get_table('nodes').columns:
            # just to keep from erroring
            print "WARNING: Using potentially broken function parcel_average_price"
            return pd.Series(0, orca.get_table('parcels').index)

    if use == "residential":
        # get node price average and put it on parcels
        col = misc.reindex(
            orca.get_table('nodes')[use],
            orca.get_table('parcels').node_id)

        # apply shifters
        cost_shifters = orca.get_table("parcels").cost_shifters
        price_shifters = orca.get_table("parcels").price_shifters
        col = col / cost_shifters * price_shifters

        # just to make sure we're in a reasonable range
        return col.fillna(0).clip(150, 1250)

    return misc.reindex(
        orca.get_table('nodes')[use],
        orca.get_table('parcels').node_id)
Exemple #5
0
def orca_dataframe_tables():
    """
    Return a list of the neames of all currently registered dataframe tables
    """
    return [
        name for name in orca.list_tables()
        if orca.table_type(name) == 'dataframe'
    ]
def test_hdf(orca_session, data):
    """
    Test loading data from an HDF file.
    
    """
    t = LoadTable()
    t.table = 'buildings'
    t.source_type = 'hdf'
    t.path = 'data/buildings.hdf'

    assert 'buildings' not in orca.list_tables()

    modelmanager.register(t)
    assert 'buildings' in orca.list_tables()
    _ = orca.get_table('buildings').to_frame()

    modelmanager.initialize()
    assert 'buildings' in orca.list_tables()

    modelmanager.remove_step(t.name)
def validate_all_tables():
    """
    Validate all tables registered with Orca. See ``validate_table()`` above.
    
    Returns
    -------
    bool
    
    """
    for t in orca.list_tables():
        validate_table(t, reciprocal=False)
def test_csv(orca_session, data):
    """
    Test loading data from a CSV file.
    
    """
    t = LoadTable()
    t.table = 'buildings'
    t.source_type = 'csv'
    t.path = 'data/buildings.csv'
    t.csv_index_cols = 'building_id'

    assert 'buildings' not in orca.list_tables()

    modelmanager.register(t)
    assert 'buildings' in orca.list_tables()
    _ = orca.get_table('buildings').to_frame()

    modelmanager.initialize()
    assert 'buildings' in orca.list_tables()

    modelmanager.remove_step(t.name)
def test_extra_settings(orca_session, data):
    """
    Test loading data with extra settings, e.g. for compressed files.
    
    """
    t = LoadTable()
    t.table = 'buildings'
    t.source_type = 'csv'
    t.path = 'data/buildings.csv.gz'
    t.csv_index_cols = 'building_id'
    t.extra_settings = {'compression': 'gzip'}

    assert 'buildings' not in orca.list_tables()

    modelmanager.register(t)
    assert 'buildings' in orca.list_tables()
    _ = orca.get_table('buildings').to_frame()

    modelmanager.initialize()
    assert 'buildings' in orca.list_tables()

    modelmanager.remove_step(t.name)
Exemple #10
0
def schema():
    """
    All tables, columns, steps, injectables and broadcasts registered with
    Orca. Inc.des local columns on tables.

    """
    tables = orca.list_tables()
    cols = {t: orca.get_table(t).columns for t in tables}
    steps = orca.list_steps()
    injectables = orca.list_injectables()
    broadcasts = orca.list_broadcasts()

    return jsonify(tables=tables, columns=cols, steps=steps, injectables=injectables, broadcasts=broadcasts)
Exemple #11
0
def schema():
    """
    All tables, columns, steps, injectables and broadcasts registered with
    Orca. Inc.des local columns on tables.

    """
    tables = orca.list_tables()
    cols = {t: orca.get_table(t).columns for t in tables}
    steps = orca.list_steps()
    injectables = orca.list_injectables()
    broadcasts = orca.list_broadcasts()

    return jsonify(
        tables=tables, columns=cols, steps=steps, injectables=injectables,
        broadcasts=broadcasts)
def test_without_autorun(orca_session, data):
    """
    Confirm that disabling autorun works.
    
    """
    t = LoadTable()
    t.table = 'buildings'
    t.source_type = 'csv'
    t.path = 'data/buildings.csv'
    t.csv_index_cols = 'building_id'
    t.autorun = False

    modelmanager.register(t)
    assert 'buildings' not in orca.list_tables()

    modelmanager.remove_step(t.name)
def parcel_average_price(use, quantile=.5):
    # I'm testing out a zone aggregation rather than a network aggregation
    # because I want to be able to determine the quantile of the distribution
    # I also want more spreading in the development and not keep it so localized
    if use == "residential":
        buildings = orca.get_table('buildings')
        s = misc.reindex(buildings.
                            residential_price[buildings.general_type ==
                                              "Residential"].
                            groupby(buildings.zone_id).quantile(.8),
                            orca.get_table('parcels').zone_id).clip(150, 1250)
        cost_shifters = orca.get_table("parcels").cost_shifters
        price_shifters = orca.get_table("parcels").price_shifters
        return s / cost_shifters * price_shifters

    if 'nodes' not in orca.list_tables():
        return pd.Series(0, orca.get_table('parcels').index)

    return misc.reindex(orca.get_table('nodes')[use],
                        orca.get_table('parcels').node_id)
Exemple #14
0
def parcel_average_price(use, quantile=.5):
    if use == "residential":
        # get node price average and put it on parcels
        s = misc.reindex(orca.get_table('nodes')[use],
                         orca.get_table('parcels').node_id)

        # apply shifters
        cost_shifters = orca.get_table("parcels").cost_shifters
        price_shifters = orca.get_table("parcels").price_shifters
        taz2_shifters = orca.get_table("parcels").taz2_price_shifters
        s = s / cost_shifters * price_shifters * taz2_shifters

        # just to make sure we're in a reasonable range
        return s.fillna(0).clip(150, 1250)

    if 'nodes' not in orca.list_tables():
        # just to keep from erroring
        return pd.Series(0, orca.get_table('parcels').index)

    return misc.reindex(orca.get_table('nodes')[use],
                        orca.get_table('parcels').node_id)
Exemple #15
0
def parcel_average_price(use, quantile=.5):
    if use == "residential":
        # get node price average and put it on parcels
        s = misc.reindex(orca.get_table('nodes')[use],
                         orca.get_table('parcels').node_id)

        cost_shifters = orca.get_table("parcels").cost_shifters
        price_shifters = orca.get_table("parcels").price_shifters
        taz2_shifters = orca.get_table("parcels").taz2_price_shifters

        s = s / cost_shifters * price_shifters * taz2_shifters

        # just to make sure we're in a reasonable range
        return s.fillna(0).clip(150, 1250)

    if 'nodes' not in orca.list_tables():
        # just to keep from erroring
        return pd.Series(0, orca.get_table('parcels').index)

    return misc.reindex(orca.get_table('nodes')[use],
                        orca.get_table('parcels').node_id)
def parcel_average_price(use, quantile=.5):
    # I'm testing out a zone aggregation rather than a network aggregation
    # because I want to be able to determine the quantile of the distribution
    # I also want more spreading in the development and not keep it localized
    if use == "residential":

        # get node price average and put it on parcels
        s = misc.reindex(orca.get_table('nodes')[use],
                         orca.get_table('parcels').node_id) * 1.3

        # apply shifters
        cost_shifters = orca.get_table("parcels").cost_shifters
        price_shifters = orca.get_table("parcels").price_shifters
        s = s / cost_shifters * price_shifters

        # just to make sure
        s = s.fillna(0).clip(150, 1250)
        return s

    if 'nodes' not in orca.list_tables():
        return pd.Series(0, orca.get_table('parcels').index)

    return misc.reindex(orca.get_table('nodes')[use],
                        orca.get_table('parcels').node_id)
Exemple #17
0
    def validate(self):
        """
        Check some basic expectations about the table generated by the step:
        
        - Confirm that the table includes a unique, named index column (primary key) or 
          set of columns (composite key). If not, raise a ValueError.
        
        - If the table contains columns whose names match the index columns of tables
          previously registered with Orca, check whether they make sense as join keys.
          Print a status message with the number of presumptive foreign-key values that 
          are found in the primary key column. 
        
        - Perform the same check for columns in previously registered tables whose names
          match the index of the table generated by this step.
          
        - It doesn't currently compare indexes to indexes. (Maybe it should?)
          
        Running this will trigger loading all registered Orca tables into memory, which 
        may take a while if they have not yet been loaded. Stand-alone columns will not 
        be loaded unless their names match an index column. 
        
        Returns
        -------
        bool
        
        """
        # There are a couple of reasons we're not using the orca_test library here:
        # (a) orca_test doesn't currently support MultiIndexes, and (b) the primary-key/
        # foreign-key comparisons aren't asserting anything, just printing status
        # messages. We should update orca_test to support both, probably.

        # Register table if needed
        if not orca.is_table(self.table):
            self.run()

        idx = orca.get_table(self.table).index

        # Check index has a name
        if list(idx.names) == [None]:
            raise ValueError("Index column has no name")

        # Check index is unique
        if len(idx.unique()) < len(idx):
            raise ValueError("Index not unique")

        # Compare columns to indexes of other tables, and vice versa
        combinations = [(self.table, t) for t in orca.list_tables() if self.table != t] \
                + [(t, self.table) for t in orca.list_tables() if self.table != t]

        for t1, t2 in combinations:
            col_names = orca.get_table(t1).columns
            idx = orca.get_table(t2).index

            if set(idx.names).issubset(col_names):
                vals = orca.get_table(t1).to_frame(idx.names).drop_duplicates()

                # Easier to compare multi-column values to multi-column index if we
                # turn the values into an index as well
                vals = vals.reset_index().set_index(idx.names).index
                vals_in_idx = sum(vals.isin(idx))

                if len(idx.names) == 1:
                    idx_str = idx.names[0]
                else:
                    idx_str = '[{}]'.format(','.join(idx.names))

                print("'{}.{}': {} of {} unique values are found in '{}.{}' ({}%)"\
                        .format(t1, idx_str,
                                vals_in_idx, len(vals),
                                t2, idx_str,
                                round(100*vals_in_idx/len(vals))))

        return True
Exemple #18
0
def run_developer(forms, agents, buildings, supply_fname, parcel_size,
                  ave_unit_size, total_units, feasibility, year=None,
                  target_vacancy=.1, form_to_btype_callback=None,
                  add_more_columns_callback=None, max_parcel_size=2000000,
                  residential=True, bldg_sqft_per_job=400.0,
                  min_unit_size=400, remove_developed_buildings=True,
                  unplace_agents=['households', 'jobs'],
                  num_units_to_build=None):
    """
    Run the developer model to pick and build buildings

    Parameters
    ----------
    forms : string or list of strings
        Passed directly dev.pick
    agents : DataFrame Wrapper
        Used to compute the current demand for units/floorspace in the area
    buildings : DataFrame Wrapper
        Used to compute the current supply of units/floorspace in the area
    supply_fname : string
        Identifies the column in buildings which indicates the supply of
        units/floorspace
    parcel_size : Series
        Passed directly to dev.pick
    ave_unit_size : Series
        Passed directly to dev.pick - average residential unit size
    total_units : Series
        Passed directly to dev.pick - total current residential_units /
        job_spaces
    feasibility : DataFrame Wrapper
        The output from feasibility above (the table called 'feasibility')
    year : int
        The year of the simulation - will be assigned to 'year_built' on the
        new buildings
    target_vacancy : float
        The target vacancy rate - used to determine how much to build
    form_to_btype_callback : function
        Will be used to convert the 'forms' in the pro forma to
        'building_type_id' in the larger model
    add_more_columns_callback : function
        Takes a dataframe and returns a dataframe - is used to make custom
        modifications to the new buildings that get added
    max_parcel_size : float
        Passed directly to dev.pick - max parcel size to consider
    min_unit_size : float
        Passed directly to dev.pick - min unit size that is valid
    residential : boolean
        Passed directly to dev.pick - switches between adding/computing
        residential_units and job_spaces
    bldg_sqft_per_job : float
        Passed directly to dev.pick - specified the multiplier between
        floor spaces and job spaces for this form (does not vary by parcel
        as ave_unit_size does)
    remove_redeveloped_buildings : optional, boolean (default True)
        Remove all buildings on the parcels which are being developed on
    unplace_agents : optional , list of strings (default ['households', 'jobs'])
        For all tables in the list, will look for field building_id and set
        it to -1 for buildings which are removed - only executed if
        remove_developed_buildings is true
    num_units_to_build: optional, int 
        If num_units_to_build is passed, build this many units rather than
        computing it internally by using the length of agents adn the sum of
        the relevant supply columin - this trusts the caller to know how to compute
        this.

    Returns
    -------
    Writes the result back to the buildings table and returns the new
    buildings with available debugging information on each new building
    """

    dev = developer.Developer(feasibility.to_frame())

    target_units = num_units_to_build or dev.\
        compute_units_to_build(len(agents),
                               buildings[supply_fname].sum(),
                               target_vacancy)

    print "{:,} feasible buildings before running developer".format(
          len(dev.feasibility))

    new_buildings = dev.pick(forms,
                             target_units,
                             parcel_size,
                             ave_unit_size,
                             total_units,
                             max_parcel_size=max_parcel_size,
                             min_unit_size=min_unit_size,
                             drop_after_build=True,
                             residential=residential,
                             bldg_sqft_per_job=bldg_sqft_per_job)

    orca.add_table("feasibility", dev.feasibility)

    if new_buildings is None:
        return

    if len(new_buildings) == 0:
        return new_buildings

    if year is not None:
        new_buildings["year_built"] = year

    if not isinstance(forms, list):
        # form gets set only if forms is a list
        new_buildings["form"] = forms

    if form_to_btype_callback is not None:
        new_buildings["building_type_id"] = new_buildings.\
            apply(form_to_btype_callback, axis=1)

    new_buildings["stories"] = new_buildings.stories.apply(np.ceil)

    ret_buildings = new_buildings
    if add_more_columns_callback is not None:
        new_buildings = add_more_columns_callback(new_buildings)

    print "Adding {:,} buildings with {:,} {}".\
        format(len(new_buildings),
               int(new_buildings[supply_fname].sum()),
               supply_fname)

    print "{:,} feasible buildings after running developer".format(
          len(dev.feasibility))

    old_buildings = buildings.to_frame(buildings.local_columns)
    new_buildings = new_buildings[buildings.local_columns]

    if remove_developed_buildings:
        old_buildings = \
            _remove_developed_buildings(old_buildings, new_buildings, unplace_agents)

    all_buildings = dev.merge(old_buildings, new_buildings)

    orca.add_table("buildings", all_buildings)

    if "residential_units" in orca.list_tables() and residential:
        # need to add units to the units table as well
        old_units = orca.get_table("residential_units")
        old_units = old_units.to_frame(old_units.local_columns)
        new_units = pd.DataFrame({
            "unit_residential_price": 0,
            "num_units": 1,
            "deed_restricted": 0,
            "unit_num": np.concatenate([np.arange(i) for i in \
                                        new_buildings.residential_units.values]),
            "building_id": np.repeat(new_buildings.index.values,
                                     new_buildings.residential_units.\
                                     astype('int32').values)
        }).sort(columns=["building_id", "unit_num"]).reset_index(drop=True)

        print "Adding {:,} units to the residential_units table".\
            format(len(new_units))
        all_units = dev.merge(old_units, new_units)
        all_units.index.name = "unit_id"

        orca.add_table("residential_units", all_units)

        return ret_buildings
        # pondered returning ret_buildings, new_units but users can get_table
        # the units if they want them - better to avoid breaking the api

    return ret_buildings
Exemple #19
0
def run_developer(forms,
                  agents,
                  buildings,
                  supply_fname,
                  parcel_size,
                  ave_unit_size,
                  total_units,
                  feasibility,
                  year=None,
                  target_vacancy=.1,
                  form_to_btype_callback=None,
                  add_more_columns_callback=None,
                  max_parcel_size=2000000,
                  residential=True,
                  bldg_sqft_per_job=400.0,
                  min_unit_size=400,
                  remove_developed_buildings=True,
                  unplace_agents=['households', 'jobs'],
                  num_units_to_build=None):
    """
    Run the developer model to pick and build buildings

    Parameters
    ----------
    forms : string or list of strings
        Passed directly dev.pick
    agents : DataFrame Wrapper
        Used to compute the current demand for units/floorspace in the area
    buildings : DataFrame Wrapper
        Used to compute the current supply of units/floorspace in the area
    supply_fname : string
        Identifies the column in buildings which indicates the supply of
        units/floorspace
    parcel_size : Series
        Passed directly to dev.pick
    ave_unit_size : Series
        Passed directly to dev.pick - average residential unit size
    total_units : Series
        Passed directly to dev.pick - total current residential_units /
        job_spaces
    feasibility : DataFrame Wrapper
        The output from feasibility above (the table called 'feasibility')
    year : int
        The year of the simulation - will be assigned to 'year_built' on the
        new buildings
    target_vacancy : float
        The target vacancy rate - used to determine how much to build
    form_to_btype_callback : function
        Will be used to convert the 'forms' in the pro forma to
        'building_type_id' in the larger model
    add_more_columns_callback : function
        Takes a dataframe and returns a dataframe - is used to make custom
        modifications to the new buildings that get added
    max_parcel_size : float
        Passed directly to dev.pick - max parcel size to consider
    min_unit_size : float
        Passed directly to dev.pick - min unit size that is valid
    residential : boolean
        Passed directly to dev.pick - switches between adding/computing
        residential_units and job_spaces
    bldg_sqft_per_job : float
        Passed directly to dev.pick - specified the multiplier between
        floor spaces and job spaces for this form (does not vary by parcel
        as ave_unit_size does)
    remove_redeveloped_buildings : optional, boolean (default True)
        Remove all buildings on the parcels which are being developed on
    unplace_agents : optional , list of strings (default ['households', 'jobs'])
        For all tables in the list, will look for field building_id and set
        it to -1 for buildings which are removed - only executed if
        remove_developed_buildings is true
    num_units_to_build: optional, int 
        If num_units_to_build is passed, build this many units rather than
        computing it internally by using the length of agents adn the sum of
        the relevant supply columin - this trusts the caller to know how to compute
        this.

    Returns
    -------
    Writes the result back to the buildings table and returns the new
    buildings with available debugging information on each new building
    """

    dev = developer.Developer(feasibility.to_frame())

    target_units = num_units_to_build or dev.\
        compute_units_to_build(len(agents),
                               buildings[supply_fname].sum(),
                               target_vacancy)

    print "{:,} feasible buildings before running developer".format(
        len(dev.feasibility))

    new_buildings = dev.pick(forms,
                             target_units,
                             parcel_size,
                             ave_unit_size,
                             total_units,
                             max_parcel_size=max_parcel_size,
                             min_unit_size=min_unit_size,
                             drop_after_build=True,
                             residential=residential,
                             bldg_sqft_per_job=bldg_sqft_per_job)

    orca.add_table("feasibility", dev.feasibility)

    if new_buildings is None:
        return

    if len(new_buildings) == 0:
        return new_buildings

    if year is not None:
        new_buildings["year_built"] = year

    if not isinstance(forms, list):
        # form gets set only if forms is a list
        new_buildings["form"] = forms

    if form_to_btype_callback is not None:
        new_buildings["building_type_id"] = new_buildings.\
            apply(form_to_btype_callback, axis=1)

    new_buildings["stories"] = new_buildings.stories.apply(np.ceil)

    ret_buildings = new_buildings
    if add_more_columns_callback is not None:
        new_buildings = add_more_columns_callback(new_buildings)

    print "Adding {:,} buildings with {:,} {}".\
        format(len(new_buildings),
               int(new_buildings[supply_fname].sum()),
               supply_fname)

    print "{:,} feasible buildings after running developer".format(
        len(dev.feasibility))

    old_buildings = buildings.to_frame(buildings.local_columns)
    new_buildings = new_buildings[buildings.local_columns]

    if remove_developed_buildings:
        old_buildings = \
            _remove_developed_buildings(old_buildings, new_buildings, unplace_agents)

    all_buildings = dev.merge(old_buildings, new_buildings)

    orca.add_table("buildings", all_buildings)

    if "residential_units" in orca.list_tables() and residential:
        # need to add units to the units table as well
        old_units = orca.get_table("residential_units")
        old_units = old_units.to_frame(old_units.local_columns)
        new_units = pd.DataFrame({
            "unit_residential_price": 0,
            "num_units": 1,
            "deed_restricted": 0,
            "unit_num": np.concatenate([np.arange(i) for i in \
                                        new_buildings.residential_units.values]),
            "building_id": np.repeat(new_buildings.index.values,
                                     new_buildings.residential_units.\
                                     astype('int32').values)
        }).sort(columns=["building_id", "unit_num"]).reset_index(drop=True)

        print "Adding {:,} units to the residential_units table".\
            format(len(new_units))
        all_units = dev.merge(old_units, new_units)
        all_units.index.name = "unit_id"

        orca.add_table("residential_units", all_units)

        return ret_buildings
        # pondered returning ret_buildings, new_units but users can get_table
        # the units if they want them - better to avoid breaking the api

    return ret_buildings
Exemple #20
0
def validate_table(table, reciprocal=True):
    """
    Check some basic expectations about an Orca table:
    
    - Confirm that it includes a unique, named index column (a.k.a. primary key) or set 
      of columns (multi-index, a.k.a. composite key). If not, raise a ValueError.
    
    - Confirm that none of the other columns in the table share names with the index(es). 
      If they do, raise a ValueError.
    
    - If the table contains columns whose names match the index columns of other tables 
      registered with Orca, check whether they make sense as join keys. This prints a 
      status message with the number of presumptive foreign-key values that are found in 
      the primary/composite key, for evaluation by the user. 
    
    - Perform the same check for columns in _other_ tables whose names match the index 
      column(s) of _this_ table.
      
    - It doesn't currently compare indexes to indexes. (Maybe it should?)
      
    Running this will trigger loading all registered Orca tables, which may take a while. 
    Stand-alone columns will not be loaded unless their names match an index column. 
    
    Doesn't currently incorporate ``orca_test`` validation, but it might be added.
    
    Parameters
    ----------
    table : str
        Name of Orca table to validate.
    
    reciprocal : bool, default True
        Whether to also check how columns of other tables align with this one's index. 
        If False, only check this table's columns against other tables' indexes. 
    
    Returns
    -------
    bool
    
    """
    # There are a couple of reasons we're not using the orca_test library here:
    # (a) orca_test doesn't currently support MultiIndexes, and (b) the primary-key/
    # foreign-key comparisons aren't asserting anything, just printing status
    # messages. We should update orca_test to support both, probably.

    if not orca.is_table(table):
        raise ValueError("Table not registered with Orca: '{}'".format(table))

    idx = orca.get_table(table).index

    # Check index has a name
    if list(idx.names) == [None]:
        raise ValueError("Index column has no name")

    # Check for unique column names
    for name in list(idx.names):
        if name in list(orca.get_table(table).columns):
            raise ValueError(
                "Index names and column names overlap: '{}'".format(name))

    # Check for unique index values
    if len(idx.unique()) < len(idx):
        raise ValueError("Index not unique")

    # Compare columns to indexes of other tables, and vice versa
    combinations = [(table, t) for t in orca.list_tables() if table != t]

    if reciprocal:
        combinations += [(t, table) for t in orca.list_tables() if table != t]

    for t1, t2 in combinations:
        col_names = orca.get_table(t1).columns
        idx = orca.get_table(t2).index

        if set(idx.names).issubset(col_names):
            vals = orca.get_table(t1).to_frame(idx.names).drop_duplicates()

            # Easier to compare multi-column values to multi-column index if we
            # turn the values into an index as well
            vals = vals.reset_index().set_index(idx.names).index
            vals_in_idx = sum(vals.isin(idx))

            if len(idx.names) == 1:
                idx_str = idx.names[0]
            else:
                idx_str = '[{}]'.format(','.join(idx.names))

            print("'{}.{}': {} of {} unique values are found in '{}.{}' ({}%)"\
                    .format(t1, idx_str,
                            vals_in_idx, len(vals),
                            t2, idx_str,
                            round(100*vals_in_idx/len(vals))))

    return True
Exemple #21
0
def tables_in_base_year():
     h5store = pd.HDFStore(os.path.join(misc.data_dir(),  
                         yamlio.yaml_to_dict(str_or_buffer=os.path.join(misc.configs_dir(), 
                                                            "settings.yaml"))['store']), mode="r")
     store_table_names = orca.get_injectable('store_table_names_dict')
     return [t for t in orca.list_tables() if t in h5store or store_table_names.get(t, None) in h5store]
Exemple #22
0
def test_table_list(expected_tables):
    print(orca.list_tables())
    assert orca.list_tables() == expected_tables
Exemple #23
0
def run_developer(forms, agents, buildings,supply_fname, parcel_size,
                  ave_unit_size, total_units, feasibility,
                  max_dua_zoning, max_res_units, addl_units,year=None,
                  target_vacancy=.1, use_max_res_units=False,
                  form_to_btype_callback=None,
                  add_more_columns_callback=None, max_parcel_size=2000000,
                  residential=True, bldg_sqft_per_job=400.0,
                  min_unit_size=400, remove_developed_buildings=True,
                  unplace_agents=['households', 'jobs'],
                  num_units_to_build=None, profit_to_prob_func=None):
    """
    Run the developer model to pick and build buildings

    Parameters
    ----------
    forms : string or list of strings
        Passed directly dev.pick
    agents : DataFrame Wrapper
        Used to compute the current demand for units/floorspace in the area
    buildings : DataFrame Wrapper
        Used to compute the current supply of units/floorspace in the area
    supply_fname : string
        Identifies the column in buildings which indicates the supply of
        units/floorspace
    parcel_size : Series
        Passed directly to dev.pick
    ave_unit_size : Series
        Passed directly to dev.pick - average residential unit size
    total_units : Series
        Passed directly to dev.pick - total current residential_units /
        job_spaces
    feasibility : DataFrame Wrapper
        The output from feasibility above (the table called 'feasibility')
    year : int
        The year of the simulation - will be assigned to 'year_built' on the
        new buildings
    target_vacancy : float
        The target vacancy rate - used to determine how much to build
    form_to_btype_callback : function
        Will be used to convert the 'forms' in the pro forma to
        'building_type_id' in the larger model
    add_more_columns_callback : function
        Takes a dataframe and returns a dataframe - is used to make custom
        modifications to the new buildings that get added
    max_parcel_size : float
        Passed directly to dev.pick - max parcel size to consider
    min_unit_size : float
        Passed directly to dev.pick - min unit size that is valid
    residential : boolean
        Passed directly to dev.pick - switches between adding/computing
        residential_units and job_spaces
    bldg_sqft_per_job : float
        Passed directly to dev.pick - specified the multiplier between
        floor spaces and job spaces for this form (does not vary by parcel
        as ave_unit_size does)
    remove_redeveloped_buildings : optional, boolean (default True)
        Remove all buildings on the parcels which are being developed on
    unplace_agents : optional , list of strings (default ['households', 'jobs'])
        For all tables in the list, will look for field building_id and set
        it to -1 for buildings which are removed - only executed if
        remove_developed_buildings is true
    num_units_to_build: optional, int
        If num_units_to_build is passed, build this many units rather than
        computing it internally by using the length of agents adn the sum of
        the relevant supply columin - this trusts the caller to know how to compute
        this.
    profit_to_prob_func: func
        Passed directly to dev.pick

    Returns
    -------
    Writes the result back to the buildings table and returns the new
    buildings with available debugging information on each new building
    """
    # num_units_to_build = target_units_def()
    # ave_unit_size = target_avg_unit_size()
    dev = developer.Developer(feasibility.to_frame())

    target_units = num_units_to_build or dev.\
        compute_units_to_build(len(agents),
                               buildings[supply_fname].sum(),
                               target_vacancy)

    print "{:,} feasible buildings before running developer".format(
          len(dev.feasibility))

    #df = dev.feasibility['residential']

    df = dev.feasibility
    df['residential','max_profit_orig'] = df['residential','max_profit']
    df['residential', 'max_profit'].loc[df['residential','max_profit_orig'] < 0] = .001
    orca.add_table("feasibility", df)

    parcels = orca.get_table('parcels').to_frame()

    df = df['residential']
    settings = orca.get_injectable('settings')
    df["parcel_size"] = parcel_size
    df["ave_unit_size"] = ave_unit_size
    df['current_units'] = total_units
    df['max_dua_zoning'] = max_dua_zoning
    df['max_res_units'] = max_res_units
    df['addl_units'] = addl_units
    df['zoning_id'] = parcels.zoning_id
    df['siteid'] = parcels.siteid
    df['zoning_schedule_id'] = parcels.zoning_schedule_id
    df['acres'] = parcels.parcel_acres
    df['land_cost_per_sqft'] = settings['default_land_cost']
    df['cap_rate'] = settings['sqftproforma_config']['cap_rate']
    df['building_efficiency'] = settings['sqftproforma_config']['building_efficiency']
    df['min_size_per_unit'] = min_unit_size
    df['max_dua_from_zoning'] =  df['max_dua_zoning']
    df['development_type_id'] = parcels.development_type_id
    df = df[df.parcel_size < max_parcel_size]
    '''
    df['units_from_max_dua_zoning'] = np.NaN

    df.loc[df['max_dua_from_zoning'] >= 0, 'units_from_max_dua_zoning'] = (df.max_dua_from_zoning * df.acres).round()
    df['units_from_max_res_zoning'] = df['max_res_units']


    df['units_from_zoning'] = np.NaN # final units from zoning

    df.loc[(df['units_from_max_res_zoning'] >= 0) &
                    (df['units_from_max_dua_zoning'].isnull()), 'units_from_zoning'] = df[
        'units_from_max_res_zoning']

    df.loc[(df['units_from_max_res_zoning'].isnull()) &
                    (df['units_from_max_dua_zoning'] >= 0), 'units_from_zoning'] = df[
        'units_from_max_dua_zoning']

    df.loc[(df['units_from_max_res_zoning'].isnull()) &
                    (df['units_from_max_dua_zoning'].isnull()), 'units_from_zoning'] = 0

    df.loc[(df['units_from_max_res_zoning'] >= 0) &
                    (df['units_from_max_dua_zoning'] >= 0), 'units_from_zoning'] = df[
        ['units_from_max_res_zoning', 'units_from_max_dua_zoning']].min(axis=1)
        '''

###################################################################################################
# for schedule 2 ONLY
    df['units_from_min_unit_size'] = (df['residential_sqft'] / min_unit_size).round()
    # df.loc[(df['units_from_max_res_zoning'].isnull()), 'units_from_zoning'] = 0
# end for schedule 2  ONLY
#######################################################################################################

    df.loc[(df['siteid'] > 0), 'units_from_zoning'] = 0

    df['final_units_constrained_by_size'] = df[['addl_units', 'units_from_min_unit_size']].min(
        axis=1)

    df['unit_size_from_final'] = df['residential_sqft'] / df['units_from_zoning']

    df.loc[(df['unit_size_from_final'] < min_unit_size), 'unit_size_from_final'] = min_unit_size

    df['final_units_constrained_by_size'] = (df['residential_sqft'] / df['unit_size_from_final']).round()
    df['net_units'] = df.final_units_constrained_by_size

    df['roi'] = df['max_profit'] / df['total_cost']

    df = df.reset_index(drop=False)
    df = df.set_index(['parcel_id'])

    unit_size_from_final = df.unit_size_from_final

    new_buildings = dev.pick(forms,
                             target_units,
                             parcel_size,
                             unit_size_from_final,
                             total_units,
                             max_parcel_size=max_parcel_size,
                             min_unit_size=min_unit_size,
                             drop_after_build=False,
                             residential=residential,
                             bldg_sqft_per_job=bldg_sqft_per_job,
                             profit_to_prob_func=profit_to_prob_func)

    orca.add_table("feasibility", dev.feasibility)

    if new_buildings is None:
        return

    if len(new_buildings) == 0:
        return new_buildings

    if year is not None:
        new_buildings["year_built"] = year

    if not isinstance(forms, list):
        # form gets set only if forms is a list
        new_buildings["form"] = forms

    if form_to_btype_callback is not None:
        new_buildings["building_type_id"] = new_buildings.\
            apply(form_to_btype_callback, axis=1)

    new_buildings["stories"] = new_buildings.stories.apply(np.ceil)

    ret_buildings = new_buildings
    if add_more_columns_callback is not None:
        new_buildings = add_more_columns_callback(new_buildings)

    print "Adding {:,} buildings with {:,} {}".\
        format(len(new_buildings),
               int(new_buildings['net_units'].sum()),
               supply_fname)

    print "{:,} feasible buildings after running developer".format(
          len(dev.feasibility))

    old_buildings = buildings.to_frame(buildings.local_columns)
    new_buildings = new_buildings[buildings.local_columns]
    new_buildings['new_bldg'] = True
    new_buildings['sch_dev'] = False
    new_buildings['new_units'] = new_buildings['residential_units']

    if remove_developed_buildings:
        old_buildings = \
            utils._remove_developed_buildings(old_buildings, new_buildings, unplace_agents)

    all_buildings, new_index = dev.merge(old_buildings, new_buildings,
                                         return_index=True)
    ret_buildings.index = new_index

    orca.add_table("buildings", all_buildings)

    if "residential_units" in orca.list_tables() and residential:
        # need to add units to the units table as well
        old_units = orca.get_table("residential_units")
        old_units = old_units.to_frame(old_units.local_columns)
        new_units = pd.DataFrame({
            "unit_residential_price": 0,
            "num_units": 1,
            "deed_restricted": 0,
            "unit_num": np.concatenate([np.arange(i) for i in \
                                        new_buildings.residential_units.values]),
            "building_id": np.repeat(new_buildings.index.values,
                                     new_buildings.residential_units.\
                                     astype('int32').values)
        }).sort(columns=["building_id", "unit_num"]).reset_index(drop=True)

        print "Adding {:,} units to the residential_units table".\
            format(len(new_units))
        all_units = dev.merge(old_units, new_units)
        all_units.index.name = "unit_id"

        orca.add_table("residential_units", all_units)

        return ret_buildings
        # pondered returning ret_buildings, new_units but users can get_table
        # the units if they want them - better to avoid breaking the api

    return ret_buildings