Python groupby_random_choice Examples, utils.groupby_random_choice Python Examples

Example #1

0

Show file

File: models.py Project: yonran/bayarea_urbansim

def accessory_units(year, buildings, parcels):
    add_units = pd.read_csv("data/accessory_units.csv",
                            index_col="juris")[str(year)]
    buildings_juris = misc.reindex(parcels.juris, buildings.parcel_id)
    res_buildings = buildings_juris[buildings.general_type == "Residential"]
    add_buildings = groupby_random_choice(res_buildings, add_units)
    add_buildings = pd.Series(add_buildings.index).value_counts()
    buildings.local.loc[add_buildings.index, "residential_units"] += \
        add_buildings.values

Example #2

0

Show file

File: models.py Project: ual/bayarea_urbansim

def accessory_units(year, buildings, parcels):
    add_units = pd.read_csv("data/accessory_units.csv",
                            index_col="juris")[str(year)]
    buildings_juris = misc.reindex(parcels.juris, buildings.parcel_id)
    res_buildings = buildings_juris[buildings.general_type == "Residential"]
    add_buildings = groupby_random_choice(res_buildings, add_units)
    add_buildings = pd.Series(add_buildings.index).value_counts()
    buildings.local.loc[add_buildings.index, "residential_units"] += \
        add_buildings.values

Example #3

0

Show file

File: models.py Project: pksohn/bayarea_urbansim

def _proportional_jobs_model(
    target_ratio,  # ratio of jobs of this sector to households
    sector,        # empsix sector
    groupby_col,   # ratio will be matched at this level of geog
    hh_df,
    jobs_df,
    locations_series,
    target_jobs=None  # pass this if you want to compute target jobs
):

    if target_jobs is None:
        # compute it if not passed
        target_jobs = hh_df[groupby_col].value_counts() * target_ratio
        target_jobs = target_jobs.astype('int')

    current_jobs = jobs_df[
        jobs_df.empsix == sector][groupby_col].value_counts()
    need_more_jobs = target_jobs - current_jobs
    need_more_jobs = need_more_jobs[need_more_jobs > 0]
    need_more_jobs_total = int(need_more_jobs.sum())

    available_jobs = \
        jobs_df.query("empsix == '%s' and building_id == -1" % sector)

    print "Need more jobs total: %d" % need_more_jobs_total
    print "Available jobs: %d" % len(available_jobs)

    if len(available_jobs) == 0:
        # corner case
        return pd.Series()

    if len(available_jobs) >= need_more_jobs_total:

        # have enough jobs to assign, truncate available jobs
        available_jobs = available_jobs.head(need_more_jobs_total)

    else:

        # don't have enough jobs - random sample locations to partially
        # match the need (won't succed matching the entire need)
        need_more_jobs = round_series_match_target(
            need_more_jobs, len(available_jobs), 0)
        need_more_jobs_total = need_more_jobs.sum()

    assert need_more_jobs_total == len(available_jobs)

    if need_more_jobs_total <= 0:
        return pd.Series()

    print "Need more jobs\n", need_more_jobs

    choices = groupby_random_choice(locations_series, need_more_jobs)

    # choose random locations within jurises to match need_more_jobs totals
    return pd.Series(choices.index, available_jobs.index)

Example #4

0

Show file

File: variables.py Project: pksohn/bayarea_urbansim

def vacant_market_rate_units_minus_structural_vacancy(buildings,
                                                      baseyear_taz_controls):
    # this will take vacant_market_rate_units above and remove the number of
    # units that we require to be vacant because of the structural vacancy rate

    # first sum the residential units by zone and multiply by structural
    # vacancy rate in order to get the required vacancies
    residential_units_by_zone = \
        buildings.residential_units.groupby(buildings.zone_id).sum()

    required_vacant_units_by_zone = \
        (residential_units_by_zone *
         baseyear_taz_controls.target_ltvacancy).astype("int")

    # repeat building ids according to the number of vacant units
    unit_zone_ids = \
        buildings.zone_id.repeat(
            buildings.vacant_market_rate_units.astype("int"))

    # this is some convoluted pandas for the next two lines!
    # but the concept is simple:
    # can't require more vacancy units than we have
    s = unit_zone_ids.value_counts().reindex(
        required_vacant_units_by_zone.index).fillna(0)

    required_vacant_units_by_zone = \
        required_vacant_units_by_zone.clip(upper=s).astype('int')

    # select among units to remove from the choice and leave vacant
    remove_unit_zone_ids =\
        groupby_random_choice(unit_zone_ids,
                              required_vacant_units_by_zone,
                              replace=False)

    # the building ids are the index, so count em up
    remove_building_zone_ids = pd.Series(
        remove_unit_zone_ids.index).value_counts()

    # subtract the ones we want to stay vacant from the vacant ones
    s = buildings.vacant_market_rate_units.sub(remove_building_zone_ids,
                                               fill_value=0)

    return s

Example #5

0

Show file

File: models.py Project: yonran/bayarea_urbansim

def _proportional_jobs_model(
    target_ratio,  # ratio of jobs of this sector to households
    sector,        # empsix sector
    groupby_col,   # ratio will be matched at this level of geog
    hh_df,
    jobs_df,
    locations_series,
    target_jobs=None  # pass this if you want to compute target jobs
):

    if target_jobs is None:
        # compute it if not passed
        target_jobs = hh_df[groupby_col].value_counts() * target_ratio
        target_jobs = target_jobs.astype('int')

    current_jobs = jobs_df[
        jobs_df.empsix == sector][groupby_col].value_counts()
    need_more_jobs = target_jobs - current_jobs
    need_more_jobs = need_more_jobs[need_more_jobs > 0]
    need_more_jobs_total = int(need_more_jobs.sum())

    available_jobs = \
        jobs_df.query("empsix == '%s' and building_id == -1" % sector)

    print "Need more jobs total: %d" % need_more_jobs_total
    print "Available jobs: %d" % len(available_jobs)

    if len(available_jobs) == 0:
        # corner case
        return pd.Series()

    if len(available_jobs) >= need_more_jobs_total:

        # have enough jobs to assign, truncate available jobs
        available_jobs = available_jobs.head(need_more_jobs_total)

    else:

        # don't have enough jobs - random sample locations to partially
        # match the need (won't succed matching the entire need)
        need_more_jobs = round_series_match_target(
            need_more_jobs, len(available_jobs), 0)
        need_more_jobs_total = need_more_jobs.sum()

    assert need_more_jobs_total == len(available_jobs)

    if need_more_jobs_total <= 0:
        return pd.Series()

    print "Need more jobs\n", need_more_jobs

    excess = need_more_jobs.sub(locations_series.value_counts(), fill_value=0)
    print "Excess demand\n", excess[excess > 0]

    # there's an issue with groupby_random_choice where it can't choose from
    # a set of locations that don't exist - e.g. we have 2 jobs in a certain
    # city but not locations to put them in.  we need to drop this demand
    drop = need_more_jobs.index.difference(locations_series.unique())
    print "We don't have any locations for these locations:\n", drop
    need_more_jobs = need_more_jobs.drop(drop)

    # choose random locations within jurises to match need_more_jobs totals
    choices = groupby_random_choice(locations_series, need_more_jobs,
                                    replace=True)

    # these might not be the same length after dropping a few lines above
    available_jobs = available_jobs.head(len(choices))

    return pd.Series(choices.index, available_jobs.index)

Example #6

0

Show file

File: models.py Project: ual/bayarea_urbansim

def _proportional_jobs_model(
    target_ratio,  # ratio of jobs of this sector to households
    sector,        # empsix sector
    groupby_col,   # ratio will be matched at this level of geog
    hh_df,
    jobs_df,
    locations_series,
    target_jobs=None  # pass this if you want to compute target jobs
):

    if target_jobs is None:
        # compute it if not passed
        target_jobs = hh_df[groupby_col].value_counts() * target_ratio
        target_jobs = target_jobs.astype('int')

    current_jobs = jobs_df[
        jobs_df.empsix == sector][groupby_col].value_counts()
    need_more_jobs = target_jobs - current_jobs
    need_more_jobs = need_more_jobs[need_more_jobs > 0]
    need_more_jobs_total = int(need_more_jobs.sum())

    available_jobs = \
        jobs_df.query("empsix == '%s' and building_id == -1" % sector)

    print "Need more jobs total: %d" % need_more_jobs_total
    print "Available jobs: %d" % len(available_jobs)

    if len(available_jobs) == 0:
        # corner case
        return pd.Series()

    if len(available_jobs) >= need_more_jobs_total:

        # have enough jobs to assign, truncate available jobs
        available_jobs = available_jobs.head(need_more_jobs_total)

    else:

        # don't have enough jobs - random sample locations to partially
        # match the need (won't succed matching the entire need)
        need_more_jobs = round_series_match_target(
            need_more_jobs, len(available_jobs), 0)
        need_more_jobs_total = need_more_jobs.sum()

    assert need_more_jobs_total == len(available_jobs)

    if need_more_jobs_total <= 0:
        return pd.Series()

    print "Need more jobs\n", need_more_jobs

    excess = need_more_jobs.sub(locations_series.value_counts(), fill_value=0)
    print "Excess demand\n", excess[excess > 0]

    # there's an issue with groupby_random_choice where it can't choose from
    # a set of locations that don't exist - e.g. we have 2 jobs in a certain
    # city but not locations to put them in.  we need to drop this demand
    drop = need_more_jobs.index.difference(locations_series.unique())
    print "We don't have any locations for these locations:\n", drop
    need_more_jobs = need_more_jobs.drop(drop)

    # choose random locations within jurises to match need_more_jobs totals
    choices = groupby_random_choice(locations_series, need_more_jobs,
                                    replace=True)

    # these might not be the same length after dropping a few lines above
    available_jobs = available_jobs.head(len(choices))

    return pd.Series(choices.index, available_jobs.index)