Ejemplo n.º 1
0
def ual_initialize_new_units(buildings, residential_units):
    """
    This data maintenance step initializes units for buildings that have been newly
    created, conforming to the data requirements of the 'residential_units' table.
    
    Data expectations
    -----------------
    - 'buildings' table has the following columns:
        - index that serves as its identifier
        - 'residential_units' (int, count of units in building)
    - 'residential_units' table has the following columns:
        - index named 'unit_id' that serves as its identifier
        - 'building_id' corresponding to the index of the 'buildings' table
    
    Results
    -------
    - extends the 'residential_units' table, following the same schema as the
      'ual_initialize_residential_units' model step
    """

    # Verify initial data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec('buildings', ColumnSpec('building_id', primary_key=True),
                      ColumnSpec('residential_units', min=0)),
            TableSpec(
                'residential_units', ColumnSpec('unit_id', primary_key=True),
                ColumnSpec('building_id',
                           foreign_key='buildings.building_id'))))

    old_units = residential_units.to_frame(residential_units.local_columns)
    bldgs = buildings.to_frame(['residential_units'])

    # Filter for residential buildings not currently represented in the units table
    bldgs = bldgs[bldgs.residential_units > 0]
    new_bldgs = bldgs[~bldgs.index.isin(old_units.building_id)]

    # Create new units, merge them, and update the table
    new_units = _ual_create_empty_units(new_bldgs)
    all_units = dev.merge(old_units, new_units)
    all_units.index.name = 'unit_id'

    print "Creating %d residential units for %d new buildings" % \
            (len(new_units), len(new_bldgs))

    orca.add_table('residential_units', all_units)

    # Verify final data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec('residential_units',
                      ColumnSpec('unit_id', primary_key=True))))
    return
Ejemplo n.º 2
0
def ual_remove_old_units(buildings, residential_units):
    """
    This data maintenance step removes units whose building_ids no longer exist.
    
    If new buildings have been created that re-use prior building_ids, we would fail to
    remove the associated units. Hopefully new buidlings do not duplicate prior ids,
    but this needs to be verified!

    Data expectations
    -----------------
    - 'buildings' table has an index that serves as its identifier
    - 'residential_units' table has a column 'building_id' corresponding to the index
      of the 'buildings' table
    
    Results
    -------
    - removes rows from the 'residential_units' table if their 'building_id' no longer
      exists in the 'buildings' table
    """

    # Verify initial data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec('buildings', ColumnSpec('building_id',
                                              primary_key=True)),
            TableSpec('residential_units',
                      ColumnSpec('building_id', numeric=True))))

    units = residential_units.to_frame(residential_units.local_columns)
    current_units = units[units.building_id.isin(buildings.index)]

    print "Removing %d residential units from %d buildings that no longer exist" % \
            ((len(units) - len(current_units)), \
            (len(units.groupby('building_id')) - len(current_units.groupby('building_id'))))

    orca.add_table('residential_units', current_units)

    # Verify final data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec(
                'residential_units',
                ColumnSpec('building_id',
                           foreign_key='buildings.building_id'))))
    return
Ejemplo n.º 3
0
def ual_assign_tenure_to_new_units(residential_units, ual_settings):
    """
    This data maintenance step assigns tenure to new residential units. Tenure is
    determined by comparing the fitted sale price and fitted rent from the hedonic models,
    with rents adjusted to price-equivalent terms using a cap rate.

    We may want to make this more sophisticated in the future, or at least stochastic. 
    Also, it might be better to do this assignment based on the zonal average prices and 
    rents following supply/demand equilibration.
    
    Data expectations
    -----------------
    - 'residential_units' table has the following columns:
        - 'hownrent' (int in range 1 to 2, may be missing)
        - 'unit_residential_price' (float, non-missing)
        - 'unit_residential_rent' (float, non-missing)
    
    Results
    -------
    - fills missing values of 'hownrent'
    """

    # Verify initial data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec(
                'residential_units',
                ColumnSpec('hownrent', min=1, max=2,
                           missing_val_coding=np.nan),
                ColumnSpec('unit_residential_price', min=0),
                ColumnSpec('unit_residential_rent', min=0))))

    cols = ['hownrent', 'unit_residential_price', 'unit_residential_rent']
    units = residential_units.to_frame(cols)

    # Filter for units that are missing a tenure assignment
    units = units[~units.hownrent.isin([1, 2])]

    # Convert monthly rent to equivalent sale price
    cap_rate = ual_settings.get('cap_rate')
    units[
        'unit_residential_rent'] = units.unit_residential_rent * 12 / cap_rate

    # Assign tenure based on higher of price or adjusted rent
    rental_units = (units.unit_residential_rent > units.unit_residential_price)
    units.loc[~rental_units, 'hownrent'] = 1
    units.loc[rental_units, 'hownrent'] = 2

    print "Adding tenure assignment to %d new residential units" % len(units)
    print units.describe()

    residential_units.update_col_from_series('hownrent',
                                             units.hownrent,
                                             cast=True)
    return
Ejemplo n.º 4
0
def ual_households_relocation(households, ual_settings):
    """
    This model step randomly assigns households for relocation, using probabilities
    that depend on their tenure status.
    
    Data expectations
    -----------------
    - 'households' table has following columns:
        - 'hownrent' (int in range [1,2], non-missing)
        - 'building_id' (int, '-1'-filled, corredponds to index of 'buildings' table
        - 'unit_id' (int, '-1'-filled, corresponds to index of 'residential_units' table
    - 'ual_settings.yaml' has:
        - 'relocation_rates' as specified in RelocationModel() documentation
        
    Results
    -------
    - assigns households for relocation by setting their 'building_id' and 'unit_id' to -1
    """

    # Verify expected data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec(
                'households',
                ColumnSpec('hownrent',
                           numeric=True,
                           min=1,
                           max=2,
                           missing=False),
                ColumnSpec('building_id', numeric=True, missing_val_coding=-1),
                ColumnSpec('unit_id', numeric=True, missing_val_coding=-1))))

    rates = pd.DataFrame.from_dict(ual_settings['relocation_rates'])

    print "Total agents: %d" % len(households)
    print "Total currently unplaced: %d" % (households.unit_id == -1).sum()
    print "Assigning for relocation..."

    # Initialize model, choose movers, and un-place them from buildings and units
    m = RelocationModel(rates)
    mover_ids = m.find_movers(households.to_frame(['unit_id', 'hownrent']))
    households.update_col_from_series('building_id',
                                      pd.Series(-1, index=mover_ids),
                                      cast=True)
    households.update_col_from_series('unit_id',
                                      pd.Series(-1, index=mover_ids),
                                      cast=True)

    print "Total currently unplaced: %d" % (households.unit_id == -1).sum()
    return
Ejemplo n.º 5
0
def ual_initialize_residential_units(buildings, ual_settings):
    """
    This initialization step creates and registers a table of synthetic residential units, 
    based on building info.
    
    Data expections
    ---------------
    - 'buildings' table has following columns:
        - index that serves as its id
        - 'residential_units' (int, never missing)
        - 'zone_id' (int, non-missing??)
    - 'ual_settings' injectable contains list of tables called 'unit_aggregation_tables'
    
    Results
    -------
    - initializes a 'residential_units' table with the following columns:
        - 'unit_id' (index)
        - 'num_units' (int, always '1', needed when passing the table to utility functions
          that expect it to look like a 'buildings' table)
        - 'unit_residential_price' (float, 0-filled)
        - 'unit_residential_rent' (float, 0-filled)
        - 'building_id' (int, non-missing, corresponds to index of 'buildings' table)
        - 'unit_num' (int, non-missing, unique within building) 
        - 'submarket_id' (int, non-missing, computed, corresponds to index of 'zones' table)
    - adds broadcasts linking 'residential_units' table to:
        - 'buildings' table
    - initializes a 'unit_aggregations' injectable containing tables as specified in 
        'ual_settings' -> 'unit_aggregation_tables'
    """

    # Verify initial data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec(
                'buildings', ColumnSpec('building_id', primary_key=True),
                ColumnSpec('residential_units', min=0, missing=False),
                ColumnSpec('zone_id',
                           foreign_key='zones.zone_id',
                           missing=False)),
            TableSpec('residential_units', registered=False),
            InjectableSpec('ual_settings', has_key='unit_aggregation_tables')))

    @orca.table('residential_units', cache=True)
    def residential_units(buildings):
        return _ual_create_empty_units(buildings)

    @orca.column('residential_units', 'submarket_id')
    def submarket_id(residential_units, buildings):
        # The submarket is used for supply/demand equilibration. It's the same as the
        # zone_id, but in a separate column to avoid name conflicts when tables are merged.
        return misc.reindex(buildings.zone_id, residential_units.building_id)

    orca.broadcast('buildings',
                   'residential_units',
                   cast_index=True,
                   onto_on='building_id')

    # This injectable provides a list of tables needed for hedonic and LCM model steps,
    # but it cannot be evaluated until the network aggregation steps are run
    @orca.injectable('unit_aggregations')
    def unit_aggregations(ual_settings):
        return [
            orca.get_table(tbl)
            for tbl in ual_settings['unit_aggregation_tables']
        ]

    # Verify final data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec(
                'residential_units', ColumnSpec('unit_id', primary_key=True),
                ColumnSpec('num_units', min=1, max=1, missing=False),
                ColumnSpec('unit_residential_price', min=0, missing=False),
                ColumnSpec('unit_residential_rent', min=0, missing=False),
                ColumnSpec('building_id',
                           foreign_key='buildings.building_id',
                           missing=False),
                ColumnSpec('unit_num', min=0, missing=False),
                ColumnSpec('submarket_id',
                           foreign_key='zones.zone_id',
                           missing=False))))
    return
Ejemplo n.º 6
0
def ual_update_building_residential_price(buildings, residential_units,
                                          ual_settings):
    """
    This data maintenance step updates the prices in the buildings table to reflect 
    changes to the unit-level prices. This allows model steps like 'price_vars' and 
    'feasibility' to read directly from the buildings table. 
    
    We currently set the building price per square foot to be the higher of the average
    (a) unit price per square foot or (b) unit price-adjusted rent per square foot.
    
    Data expectations
    -----------------
    - 'residential_units' table has following columns:
        - 'unit_residential_price' (float, 0-filled)
        - 'unit_residential_rent' (float, 0-filled)
        - 'building_id' (int, non-missing, corresponds to index of 'buildings' table)
    - 'buildings' table has following columns:
        - index that serves as its id
        - 'residential_price' (float, 0-filled)
    - 'ual_settings' injectable has a 'cap_rate' (float, range 0 to 1)
    
    Results
    -------
    - updates the 'buildings' table:
        - 'residential_price' = max avg of unit prices or rents        
    """

    # Verify initial data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec(
                'residential_units', ColumnSpec('unit_residential_price',
                                                min=0),
                ColumnSpec('unit_residential_rent', min=0),
                ColumnSpec('building_id',
                           foreign_key='buildings.building_id',
                           missing=False)),
            TableSpec('buildings', ColumnSpec('building_id', primary_key=True),
                      ColumnSpec('residential_price', min=0)),
            InjectableSpec('ual_settings', min=0, max=1)))

    cols = ['building_id', 'unit_residential_price', 'unit_residential_rent']
    means = residential_units.to_frame(cols).groupby(['building_id']).mean()

    # Convert monthly rent to equivalent sale price
    cap_rate = ual_settings.get('cap_rate')
    means[
        'unit_residential_rent'] = means.unit_residential_rent * 12 / cap_rate

    # Calculate max of price or rent, by building
    means['max_potential'] = means.max(axis=1)
    print means.describe()

    # Update the buildings table
    buildings.update_col_from_series('residential_price',
                                     means.max_potential,
                                     cast=True)

    # Verify final data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '', TableSpec('buildings', ColumnSpec('residential_price',
                                                  min=0))))
    return
Ejemplo n.º 7
0
def reconcile_unplaced_households(households):
    """
    This data maintenance step keeps the building/unit/household correspondence up to 
    date by reconciling unplaced households.
    
    In the current data model, households should have both a 'building_id' and 'unit_id'
    of -1 when they are not matched with housing. But sometimes only of these is set when
    households are created or unplaced. If households have been unplaced from buildings, 
    this model step unplaces them from units as well. Or if they have been unplaced from 
    units, it unplaces them from buildings. 
    
    Data expectations
    -----------------
    - 'households' table has an index, and these columns:
        - 'unit_id' (int, '-1'-filled)
        - 'building_id' (int, '-1'-filled)
    
    Results
    -------
    - updates the 'households' table:
        - 'unit_id' = 'building_id' = -1 for the superset of rows where either column
          initially had this vaue       
    """

    # Verify initial data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec(
                'households',
                ColumnSpec('unit_id', numeric=True, missing_val_coding=-1),
                ColumnSpec('building_id', numeric=True,
                           missing_val_coding=-1))))

    def _print_status():
        print "Households not in a unit: %d" % (households.unit_id == -1).sum()
        print "Househing missing a unit: %d" % households.unit_id.isnull().sum(
        )
        print "Households not in a building: %d" % (households.building_id
                                                    == -1).sum()
        print "Househing missing a building: %d" % households.building_id.isnull(
        ).sum()

    _print_status()
    print "Reconciling unplaced households..."
    hh = households.to_frame(['building_id', 'unit_id'])

    # Get indexes of households unplaced in buildings or in units
    bldg_unplaced = pd.Series(-1, index=hh[hh.building_id == -1].index)
    unit_unplaced = pd.Series(-1, index=hh[hh.unit_id == -1].index)

    # Update those households to be fully unplaced
    households.update_col_from_series('building_id', unit_unplaced, cast=True)
    households.update_col_from_series('unit_id', bldg_unplaced, cast=True)
    _print_status()

    # Verify final data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec(
                'households',
                ColumnSpec('unit_id',
                           foreign_key='residential_units.unit_id',
                           missing_val_coding=-1),
                ColumnSpec('building_id',
                           foreign_key='buildings.building_id',
                           missing_val_coding=-1))))
    return
Ejemplo n.º 8
0
def reconcile_placed_households(households, residential_units):
    """
    This data maintenance step keeps the building/unit/household correspondence up to 
    date by reconciling placed households.
    
    In the current data model, households should have both a 'building_id' and 'unit_id'
    when they have been matched with housing. But the existing HLCM models assign only
    a 'unit_id', so this model step updates the building id's accordingly. 
    
    Data expectations
    -----------------
    - 'households' table has the following columns:
        - index 'household_id'
        - 'unit_id' (int, '-1'-filled)
        - 'building_id' (int, '-1'-filled)
    - 'residential_units' table has the following columns:
        - index 'unit_id'
        - 'building_id' (int, non-missing, corresponds to index of the 'buildings' table)
    
    Results
    -------
    - updates the 'households' table:
        - 'building_id' updated where it was -1 but 'unit_id' wasn't
    """

    # Verify initial data characteristics

    # ot.assert_orca_spec(OrcaSpec('',
    #     TableSpec('households',
    #         ColumnSpec('household_id', primary_key=True),
    #         ColumnSpec('unit_id', foreign_key='residential_units.unit_id', missing_val_coding=-1),
    #         ColumnSpec('building_id', foreign_key='buildings.building_id', missing_val_coding=-1)),
    #     TableSpec('residential_units',
    #         ColumnSpec('unit_id', primary_key=True),
    #         ColumnSpec('building_id', foreign_key='buildings.building_id', missing=False))))

    hh = households.to_frame(['unit_id', 'building_id'])
    hh.index.rename('household_id', inplace=True)
    hh = hh.reset_index()
    print "hh columns: %s" % hh.columns

    # hh.index.name = 'household_id'
    units = residential_units.to_frame(['building_id']).reset_index()

    # Filter for households missing a 'building_id' but not a 'unit_id'
    hh = hh[(hh.building_id == -1) & (hh.unit_id != -1)]

    # Join building id's to the filtered households, using mapping from the units table
    hh = hh.drop('building_id', axis=1)
    hh = pd.merge(hh, units, on='unit_id',
                  how='left').set_index('household_id')
    print "hh index.names: %s" % hh.index.names

    print "%d movers updated" % len(hh)
    households.update_col_from_series('building_id', hh.building_id, cast=True)

    # Verify final data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec(
                'households',
                ColumnSpec('building_id',
                           foreign_key='buildings.building_id',
                           missing_val_coding=-1))))
    return
Ejemplo n.º 9
0
def ual_assign_tenure_to_units(residential_units, households):
    """
    This initialization step assigns tenure to residential units, based on the 'hownrent'
    attribute of the households occupying them. (Tenure for unoccupied units is assigned
    randomly.)
    
    Data expections
    ---------------
    - 'residential_units' table has NO column 'hownrent'
    - 'households' table has following columns: 
        - 'hownrent' (int, missing values ok) 
        - 'unit_id' (int, '-1'-filled, corresponds to index of 'residential_units' table)
    
    Results
    -------
    - adds following column to 'residential_units' table:
        - 'hownrent' (int in range [1,2], non-missing)
    """

    # Verify initial data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec('residential_units',
                      ColumnSpec('hownrent', registered=False)),
            TableSpec(
                'households',
                ColumnSpec('hownrent', min=1, max=2,
                           missing_val_coding=np.nan),
                ColumnSpec('unit_id',
                           foreign_key='residential_units.unit_id',
                           missing_val_coding=-1))))

    units = residential_units.to_frame(residential_units.local_columns)
    hh = households.to_frame(['hownrent', 'unit_id'])

    # 'Hownrent' is a PUMS field where 1=owns, 2=rents. Note that there's also a field
    # in the MTC households table called 'tenure', with min=1, max=4, mean=2. Not sure
    # where this comes from or what the values indicate.

    units['hownrent'] = np.nan
    own = hh[(hh.hownrent == 1) & (hh.unit_id != -1)].unit_id.values
    rent = hh[(hh.hownrent == 2) & (hh.unit_id != -1)].unit_id.values
    units.loc[own, 'hownrent'] = 1
    units.loc[rent, 'hownrent'] = 2

    print "Initial unit tenure assignment: %d%% owner occupied, %d%% unfilled" % \
            (round(len(units[units.hownrent == 1])*100/len(units[units.hownrent.notnull()])), \
             round(len(units[units.hownrent.isnull()])*100/len(units)))

    # Fill remaining units with random tenure assignment
    # TO DO: Make this weighted by existing allocation, rather than 50/50
    unfilled = units[units.hownrent.isnull()].index
    units.loc[unfilled, 'hownrent'] = np.random.randint(1, 3, len(unfilled))

    orca.add_table('residential_units', units)

    # Verify final data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec(
                'residential_units',
                ColumnSpec('hownrent', min=1, max=2,
                           missing_val_coding=np.nan))))
    return
Ejemplo n.º 10
0
def ual_match_households_to_units(households, residential_units):
    """
    This initialization step adds a 'unit_id' to the households table and populates it
    based on existing assignments of households to buildings. This also allows us to add
    a 'vacant_units' count to the residential_units table. 
    
    Data expectations
    -----------------
    - 'households' table has NO column 'unit_id'
    - 'households' table has column 'building_id' (int, '-1'-filled, corresponds to index 
          of 'buildings' table)
    - 'residential_units' table has an index that serves as its id, and following columns:
        - 'building_id' (int, non-missing, corresponds to index of 'buildings' table)
        - 'unit_num' (int, non-missing, unique within building)
    
    Results
    -------
    - adds following column to 'households' table:
        - 'unit_id' (int, '-1'-filled, corresponds to index of 'residential_units' table)
    - adds following column to 'residential_units' table:
        - 'vacant_units' (int, 0 or 1, computed)
    - adds a broadcast linking 'households' to 'residential_units'
    """

    # Verify initial data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec(
                'households', ColumnSpec('unit_id', registered=False),
                ColumnSpec('building_id',
                           foreign_key='buildings.building_id',
                           missing_val_coding=-1)),
            TableSpec(
                'residential_units', ColumnSpec('unit_id', primary_key=True),
                ColumnSpec('building_id',
                           foreign_key='buildings.building_id',
                           missing=False),
                ColumnSpec('unit_num', min=0, missing=False))))

    hh = households.to_frame(households.local_columns)
    units = residential_units.to_frame(['building_id', 'unit_num'])

    # This code block is from Fletcher
    unit_lookup = units.reset_index().set_index(['building_id', 'unit_num'])
    hh = hh.sort_values(by=['building_id'], ascending=True)
    building_counts = hh.building_id.value_counts().sort_index()
    hh['unit_num'] = np.concatenate(
        [np.arange(i) for i in building_counts.values])
    unplaced = hh[hh.building_id == -1].index
    placed = hh[hh.building_id != -1].index
    indexes = [
        tuple(t) for t in hh.loc[placed, ['building_id', 'unit_num']].values
    ]
    hh.loc[placed, 'unit_id'] = unit_lookup.loc[indexes].unit_id.values
    hh.loc[unplaced, 'unit_id'] = -1
    orca.add_table('households', hh)

    @orca.column('residential_units', 'vacant_units')
    def vacant_units(residential_units, households):
        return residential_units.num_units.sub(
            households.unit_id[households.unit_id != -1].value_counts(),
            fill_value=0)

    orca.broadcast('residential_units',
                   'households',
                   cast_index=True,
                   onto_on='unit_id')

    # Verify final data characteristics
    ot.assert_orca_spec(
        OrcaSpec(
            '',
            TableSpec(
                'households',
                ColumnSpec('unit_id',
                           foreign_key='residential_units.unit_id',
                           missing_val_coding=-1)),
            TableSpec('residential_units',
                      ColumnSpec('vacant_units', min=0, max=1))))
    return