Exemple #1
0
def gen_probs(dset, movers, agents_groupby, alts, output_names):
    output_csv, output_title, coeff_name, output_varname = output_names
    pdf = pd.DataFrame(index=alts.index)
    segments = movers.groupby(agents_groupby)

    for name, segment in segments:
        segment = segment.head(1)
        name = str(name)
        tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name
        ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(
            dset.coeffs[(tmp_coeffname,
                         'fnames')].isnull().values)].values.tolist()
        SAMPLE_SIZE = alts.index.size
        numchoosers = segment.shape[0]
        numalts = alts.shape[0]
        sample = np.tile(alts.index.values, numchoosers)
        alts_sample = alts  #sample#alternatives
        alts_sample['join_index'] = np.repeat(segment.index.values,
                                              SAMPLE_SIZE)
        alts_sample = pd.merge(alts_sample,
                               segment,
                               left_on='join_index',
                               right_index=True,
                               suffixes=('', '_r'))
        chosen = np.zeros((numchoosers, SAMPLE_SIZE))
        chosen[:, 0] = 1
        sample, alternative_sample, est_params = sample, alts_sample, ('mnl',
                                                                       chosen)
        ##Interaction variables
        interaction_vars = [(var, var.split('_x_')) for var in ind_vars
                            if '_x_' in var]
        for ivar in interaction_vars:
            if ivar[1][0].endswith('gt'):
                alternative_sample[ivar[0]] = (
                    (alternative_sample[ivar[1][0]]) >
                    alternative_sample[ivar[1][1]]).astype('int32')
            if ivar[1][0].endswith('lt'):
                alternative_sample[ivar[0]] = (
                    (alternative_sample[ivar[1][0]]) <
                    alternative_sample[ivar[1][1]]).astype('int32')
            else:
                alternative_sample[ivar[0]] = (
                    (alternative_sample[ivar[1][0]]) *
                    alternative_sample[ivar[1][1]])

        est_data = pd.DataFrame(index=alternative_sample.index)
        for varname in ind_vars:
            est_data[varname] = alternative_sample[varname]
        est_data = est_data.fillna(0)
        data = est_data
        data = data.as_matrix()
        coeff = dset.load_coeff(tmp_coeffname)
        probs = interaction.mnl_simulate(data,
                                         coeff,
                                         numalts=SAMPLE_SIZE,
                                         returnprobs=1)
        pdf['segment%s' % name] = pd.Series(probs.flatten(), index=alts.index)

    return pdf
def gen_probs(dset, movers, agents_groupby, alts, output_names):
    output_csv, output_title, coeff_name, output_varname = output_names
    pdf = pd.DataFrame(index=alts.index)
    segments = movers.groupby(agents_groupby)

    for name, segment in segments:
        segment = segment.head(1)
        name = str(name)
        tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name
        ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist()
        SAMPLE_SIZE = alts.index.size
        numchoosers = segment.shape[0]
        numalts = alts.shape[0]
        sample = np.tile(alts.index.values,numchoosers)
        alts_sample = alts #sample#alternatives
        alts_sample['join_index'] = np.repeat(segment.index.values,SAMPLE_SIZE)
        alts_sample = pd.merge(alts_sample,segment,left_on='join_index',right_index=True,suffixes=('','_r'))
        chosen = np.zeros((numchoosers,SAMPLE_SIZE))
        chosen[:,0] = 1
        sample, alternative_sample, est_params = sample, alts_sample, ('mnl',chosen)
        ##Interaction variables
        interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var]
        for ivar in interaction_vars:
            if ivar[1][0].endswith('gt'):
                alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])>alternative_sample[ivar[1][1]]).astype('int32')
            if ivar[1][0].endswith('lt'):
                alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])<alternative_sample[ivar[1][1]]).astype('int32')
            else:
                alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])*alternative_sample[ivar[1][1]])

        est_data = pd.DataFrame(index=alternative_sample.index)
        for varname in ind_vars:
            est_data[varname] = alternative_sample[varname]
        est_data = est_data.fillna(0)
        data = est_data
        data = data.as_matrix()
        coeff = dset.load_coeff(tmp_coeffname)
        probs = interaction.mnl_simulate(data,coeff,numalts=SAMPLE_SIZE,returnprobs=1)
        pdf['segment%s'%name] = pd.Series(probs.flatten(),index=alts.index)

    return pdf
Exemple #3
0
def simulate(dset,
             year,
             depvar='building_id',
             alternatives=None,
             simulation_table=None,
             output_names=None,
             agents_groupby=[
                 'income_3_tenure',
             ],
             transition_config=None,
             relocation_config=None):

    output_csv, output_title, coeff_name, output_varname = output_names

    if transition_config['Enabled']:
        ct = dset.fetch(transition_config['control_totals_table'])
        if 'persons' in ct.columns:
            del ct['persons']
        ct["total_number_of_households"] = (
            ct["total_number_of_households"] *
            transition_config['scaling_factor']).astype('int32')
        hh = dset.fetch('households')
        persons = dset.fetch('persons')
        tran = transition.TabularTotalsTransition(
            ct, 'total_number_of_households')
        model = transition.TransitionModel(tran)
        #import pdb; pdb.set_trace()
        new, added, new_linked = model.transition(
            hh, year, linked_tables={'linked': (persons, 'household_id')})
        new.loc[added, 'building_id'] = -1
        dset.d['households'] = new
        dset.d['persons'] = new_linked['linked']

    #calculate mortgage payment values

    temp_count = 0

    buildings = alternatives
    out_table = pd.DataFrame(columns=dset.households.columns)
    homeless = pd.DataFrame(columns=dset.households.columns)
    r = .05 / 12
    n = 360
    buildings['est_mortgage_payment'] = buildings.unit_price_residential * (
        (r * (1 + r)**n) / ((1 + r)**n - 1))

    dset.households.index.name = 'household_id'
    choosers = dset.fetch(simulation_table)

    if relocation_config['Enabled']:
        rate_table = dset.store[
            relocation_config['relocation_rates_table']].copy()
        rate_field = "probability_of_relocating"
        rate_table[rate_field] = rate_table[
            rate_field] * .01 * relocation_config['scaling_factor']
        movers = dset.relocation_rates(choosers, rate_table, rate_field)
        choosers[depvar].ix[movers] = -1

    movers_all = choosers[choosers[depvar] == -1]
    county_growth_share = pd.read_csv(os.path.join(misc.data_dir(),
                                                   'county_growth_share.csv'),
                                      index_col=0)
    counties = county_growth_share.columns.values
    current_growth_shares = county_growth_share.loc[year].values
    movers_counties = np.random.choice(counties,
                                       movers_all.shape[0],
                                       replace=True,
                                       p=current_growth_shares)

    movers_all['county_id'] = movers_counties

    income_segment = movers_all.groupby('income_grp')[
        'upper_income_grp_val', 'lower_income_grp_val'].agg([np.mean, np.size])
    # get county growth control data and merge with income_segements

    income_segment['county'] = county_growth_share.loc[year].index.values[0]
    income_segment['growth_share'] = county_growth_share.loc[year][0]
    copy_df = income_segment.copy()
    for i in county_growth_share.loc[year][1:].iteritems():

        copy_df['county'] = i[0]
        copy_df['growth_share'] = i[1]
        income_segment = pd.concat([income_segment, copy_df])

    income_segment = income_segment.set_index(['county', income_segment.index])

    print "Total new agents and movers = %d" % len(movers_all.index)

    for seg in income_segment.iterrows():

        movers = movers_all[(movers_all['income'] <= seg[1][0])
                            & (movers_all['income'] >= seg[1][2])]
        print 'County: %s. Placing %d households in the income range (%d, %d)' % (
            seg[0][0], seg[1][1], seg[1][2], seg[1][0])

        empty_units = buildings.residential_units.sub(choosers[
            choosers['building_id'] != -1].groupby('building_id').size(),
                                                      fill_value=0)
        empty_units = empty_units[empty_units > 0].order(ascending=False)
        print 'number of empty units is %d' % empty_units.sum()
        alternatives = buildings.ix[np.repeat(
            empty_units.index.values, empty_units.values.astype('int'))]
        alternatives = alternatives[alternatives.county_id == int(seg[0][0])]

        if ((seg[1][2] / 12) <= 0):
            alts = alternatives[
                alternatives['unit_price_residential'] < 186281]
        elif ((seg[1][2] / 12) >= 55000):
            alts = alternatives[
                alternatives['unit_price_residential'] > 1583400]
        else:
            alts = alternatives[alternatives['est_mortgage_payment'] /
                                (seg[1][2] / 12) <= 0.33]
        if (alts.shape[0] == 0):
            homeless = pd.concat([choosers, homeless])
            print 'Could not place %d households due to income restrictions' % seg[
                1][1]
            continue

        pdf = pd.DataFrame(index=alts.index)

        segments = movers.groupby(agents_groupby)

        ##simulation
        for name, segment in segments:
            segment = segment.head(1)
            name = str(name)
            tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name
            ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(
                dset.coeffs[(tmp_coeffname,
                             'fnames')].isnull().values)].values.tolist()
            SAMPLE_SIZE = alts.index.size
            numchoosers = segment.shape[0]
            numalts = alts.shape[0]
            sample = np.tile(alts.index.values, numchoosers)
            alts_sample = alts
            alts_sample['join_index'] = np.repeat(segment.index.values,
                                                  SAMPLE_SIZE)
            alts_sample = pd.merge(alts_sample,
                                   segment,
                                   left_on='join_index',
                                   right_index=True,
                                   suffixes=('', '_r'))
            chosen = np.zeros((numchoosers, SAMPLE_SIZE))
            chosen[:, 0] = 1
            sample, alternative_sample, est_params = sample, alts_sample, (
                'mnl', chosen)
            ##Interaction variables
            interaction_vars = [(var, var.split('_x_')) for var in ind_vars
                                if '_x_' in var]
            for ivar in interaction_vars:
                if ivar[1][0].endswith('gt'):
                    alternative_sample[ivar[0]] = (
                        (alternative_sample[ivar[1][0]]) >
                        alternative_sample[ivar[1][1]]).astype('int32')
                if ivar[1][0].endswith('lt'):
                    alternative_sample[ivar[0]] = (
                        (alternative_sample[ivar[1][0]]) <
                        alternative_sample[ivar[1][1]]).astype('int32')
                else:
                    alternative_sample[ivar[0]] = (
                        (alternative_sample[ivar[1][0]]) *
                        alternative_sample[ivar[1][1]])

            est_data = pd.DataFrame(index=alternative_sample.index)
            for varname in ind_vars:
                est_data[varname] = alternative_sample[varname]
            est_data = est_data.fillna(0)
            data = est_data
            data = data.as_matrix()
            coeff = dset.load_coeff(tmp_coeffname)
            probs = interaction.mnl_simulate(data,
                                             coeff,
                                             numalts=SAMPLE_SIZE,
                                             returnprobs=1)
            pdf['segment%s' % name] = pd.Series(probs.flatten(),
                                                index=alts.index)

        new_homes = pd.Series(np.ones(len(movers.index)) * -1,
                              index=movers.index)
        for name, segment in segments:
            name_coeff = str(name)
            name = str(name)
            p = pdf['segment%s' % name]
            mask = np.zeros(len(alts.index), dtype='bool')
            mask = pd.Series(mask, index=alts.index)

            print "Assigning units to %d agents of segment %s" % (len(
                segment.index), name)

            def choose(p,
                       mask,
                       alternatives,
                       segment,
                       new_homes,
                       minsize=None):
                p = copy.copy(p)
                p.loc[mask[mask == True].index] = 0  # already chosen
                try:
                    indexes = np.random.choice(alternatives.index.values,
                                               len(segment.index),
                                               replace=False,
                                               p=p.values / p.values.sum())
                except:
                    print "WARNING: not enough options to fit agents, will result in unplaced agents"
                    indexes = np.random.choice(alternatives.index.values,
                                               len(alternatives.index.values),
                                               replace=False,
                                               p=p.values / p.values.sum())

                    if (new_homes.ix[segment[segment.tenure == 2].index.
                                     values[:len(alternatives.index.values)]].
                            shape[0] != 0):
                        new_homes.ix[
                            segment[segment.tenure == 2].index.
                            values[:len(alternatives.index.values)]] = -2
                    else:
                        new_homes.ix[segment.index.values[:len(
                            alternatives.index.values
                        )]] = alternatives.index.values

                    mask.loc[indexes] = True
                    return mask, new_homes

                new_homes.ix[segment.index] = alternatives.loc[
                    indexes].index.values[:len(new_homes.ix[segment.index])]
                mask.loc[indexes] = True

                return mask, new_homes

            mask, new_homes = choose(p, mask, alts, segment, new_homes)

        build_cnts = new_homes.value_counts(
        )  #num households place in each building
        print "Assigned %d agents to %d locations with %d unplaced" % (
            new_homes.size, build_cnts.size, build_cnts.get(-1, 0))

        table = dset.households  # need to go back to the whole dataset
        table[depvar].ix[new_homes.index] = new_homes.values.astype('int32')
        #table.to_sql('tmp_out', engine, if_exists='append')
        table = table.ix[new_homes.index]
        out_table = pd.concat([table, out_table])
        choosers.loc[table.index] = table
        #dset.store_attr(output_varname,year,copy.deepcopy(table[depvar]))
        # old_building_count = buildings.shape[0]
        # buildings = buildings.drop(new_homes.index)
        # new_building_count = buildings.shape[0]
        # print '%d units were filled' %(new_building_count - old_building_count)
        #buildings = buildings.drop(new_homes)
        #temp_count += 1
        if (temp_count > 50):
            break
    #out_table.to_csv('C:/users/jmartinez/documents/households_simulation_test.csv')
    dset.households.loc[out_table.index] = out_table
def simulate(
    dset,
    year,
    depvar="building_id",
    alternatives=None,
    simulation_table="establishments",
    output_names=None,
    agents_groupby=["income_3_tenure"],
    transition_config=None,
):

    output_csv, output_title, coeff_name, output_varname = output_names

    if transition_config["Enabled"]:
        ct = dset.fetch(transition_config["control_totals_table"])
        ct["total_number_of_jobs"] = (ct["total_number_of_jobs"] * transition_config["scaling_factor"]).astype("int32")
        new_jobs = {
            "table": "dset.establishments",
            "writetotmp": "establishments",
            "model": "transitionmodel",
            "first_year": 2010,
            "control_totals": "dset.%s" % transition_config["control_totals_table"],
            "geography_field": "building_id",
            "amount_field": "total_number_of_jobs",
            "size_field": "employees",
        }
        import synthicity.urbansim.transitionmodel as transitionmodel

        transitionmodel.simulate(dset, new_jobs, year=year, show=True)

    dset.establishments.index.name = "establishment_id"
    choosers = dset.fetch(simulation_table)
    placed_choosers = choosers[choosers[depvar] > 0]

    movers = choosers[choosers[depvar] == -1]

    movers["zone_id"] = -1
    print "Total new agents and movers = %d" % len(movers.index)
    dset.establishments.loc[movers.index, "zone_id"] = -1
    print dset.establishments[dset.establishments["zone_id"] == 1834].employees.sum()
    alternatives.building_sqft_per_job = alternatives.building_sqft_per_job.fillna(1000)

    alternatives.loc[:, "spaces"] = (
        alternatives.non_residential_sqft / alternatives.building_sqft_per_job
    )  # corrected chained indexing error
    # alternatives[ 'spaces'] = alternatives.non_residential_sqft/alternatives.building_sqft_per_job
    # alternatives.loc[:, 'spaces'] = alternatives.non_residential_sqft/1000

    empty_units = alternatives.spaces.sub(placed_choosers.groupby("building_id").employees.sum(), fill_value=0).astype(
        "int"
    )
    empty_units = empty_units[empty_units > 0].order(ascending=False)
    print empty_units[empty_units.index == 472137]

    alts = alternatives.ix[empty_units.index]
    alts["supply"] = empty_units
    print movers[movers.employees > 4000]

    u = pd.DataFrame(empty_units)
    u.columns = ["empty"]
    u["building_id"] = u.index
    empty_units_test = pd.merge(u, alternatives[["zone_id"]], left_on="building_id", right_index=True)
    print empty_units_test[empty_units_test.zone_id == 1834]["empty"].sum()
    lotterychoices = True
    pdf = pd.DataFrame(index=alts.index)

    segments = movers.groupby(agents_groupby)

    for name, segment in segments:
        segment = segment.head(1)
        name = str(name)
        tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name
        ind_vars = dset.coeffs[(tmp_coeffname, "fnames")][
            np.invert(dset.coeffs[(tmp_coeffname, "fnames")].isnull().values)
        ].values.tolist()
        SAMPLE_SIZE = alts.index.size
        numchoosers = segment.shape[0]
        numalts = alts.shape[0]
        sample = np.tile(alts.index.values, numchoosers)
        alts_sample = alts  # sample#alternatives
        alts_sample["join_index"] = np.repeat(segment.index.values, SAMPLE_SIZE)
        alts_sample = pd.merge(alts_sample, segment, left_on="join_index", right_index=True, suffixes=("", "_r"))
        chosen = np.zeros((numchoosers, SAMPLE_SIZE))
        chosen[:, 0] = 1
        sample, alternative_sample, est_params = sample, alts_sample, ("mnl", chosen)
        ##Interaction variables
        interaction_vars = [(var, var.split("_x_")) for var in ind_vars if "_x_" in var]
        for ivar in interaction_vars:
            if ivar[1][0].endswith("gt"):
                alternative_sample[ivar[0]] = (
                    (alternative_sample[ivar[1][0]]) > alternative_sample[ivar[1][1]]
                ).astype("int32")
            if ivar[1][0].endswith("lt"):
                alternative_sample[ivar[0]] = (
                    (alternative_sample[ivar[1][0]]) < alternative_sample[ivar[1][1]]
                ).astype("int32")
            else:
                alternative_sample[ivar[0]] = (alternative_sample[ivar[1][0]]) * alternative_sample[ivar[1][1]]

        est_data = pd.DataFrame(index=alternative_sample.index)
        for varname in ind_vars:
            est_data[varname] = alternative_sample[varname]
        est_data = est_data.fillna(0)
        data = est_data
        data = data.as_matrix()
        coeff = dset.load_coeff(tmp_coeffname)
        probs = interaction.mnl_simulate(data, coeff, numalts=SAMPLE_SIZE, returnprobs=1)
        pdf["segment%s" % name] = pd.Series(probs.flatten(), index=alts.index)

    new_homes = pd.Series(np.ones(len(movers.index)) * -1, index=movers.index)
    mask = np.zeros(len(alts.index), dtype="bool")

    for name, segment in segments:
        name = str(name)
        print "Assigning units to %d agents of segment %s" % (len(segment.index), name)
        p = pdf["segment%s" % name].values

        def choose(p, mask, alternatives, segment, new_homes, minsize=None):
            p = copy.copy(p)
            # p[alternatives.supply<minsize] = 0
            pu = pd.DataFrame(p, index=alternatives.index)
            pu.columns = ["pro"]
            pu.loc[alternatives.supply < minsize, "pro"] = 0

            # p=p[alternatives.supply>=minsize]
            pp = np.array(pu).flatten()
            try:
                indexes = np.random.choice(len(alternatives.index), len(segment.index), replace=False, p=pp / pp.sum())
            except:
                print "WARNING: not enough options to fit agents, will result in unplaced agents"
                return mask, new_homes
            new_homes.ix[segment.index] = alternatives.index.values[indexes]
            alternatives["supply"].ix[alternatives.index.values[indexes]] -= minsize
            return mask, new_homes

        tmp = segment["employees"]
        for name, subsegment in reversed(list(segment.groupby(tmp.astype("int")))):
            mask, new_homes = choose(p, mask, alts, subsegment, new_homes, minsize=int(name))

    build_cnts = new_homes.value_counts()  # num estabs place in each building
    print "Assigned %d agents to %d locations with %d unplaced" % (
        new_homes.size,
        build_cnts.size,
        build_cnts.get(-1, 0),
    )

    p = dset.parcels
    p = p.set_index("parcel_id")

    # b=pd.merge(b, p[['zone_id']], left_on='parcel_id', right_index=True)
    # est=pd.merge(dset.establishments, b[['zone_id']], left_on='building_id', right_index=True)

    del dset.establishments["zone_id"]
    dset.establishments["zone_id"] = pd.merge(
        dset.establishments, dset.buildings[["zone_id"]], left_on="building_id", right_index=True
    )["zone_id"]

    print dset.establishments[dset.establishments["zone_id"] == 1834].employees.sum()
    placed_choosers = choosers[choosers[depvar] > 0]
    empty_units = alternatives.spaces.sub(placed_choosers.groupby("building_id").employees.sum(), fill_value=0).astype(
        "int"
    )

    table = dset.establishments  # need to go back to the whole dataset
    table[depvar].ix[new_homes.index] = new_homes.values.astype("int32")
    del table["zone_id"]
    table["zone_id"] = pd.merge(
        dset.establishments, dset.buildings[["zone_id"]], left_on="building_id", right_index=True
    )["zone_id"]
    print table.groupby("zone_id").employees.sum().loc[1834]

    table["space"] = 0

    # b.building_sqft_per_job = table.building_sqft_per_job.fillna(1000)
    alternatives.loc[:, "spaces"] = alternatives.non_residential_sqft / alternatives.building_sqft_per_job
    empty_units = alternatives.spaces.sub(
        table[table["building_id"] > 0].groupby("building_id").employees.sum(), fill_value=0
    ).astype("int")
    empty_units = empty_units[empty_units > 0].order(ascending=False)
    u = pd.DataFrame(empty_units)
    u.columns = ["empty"]
    u["building_id"] = u.index
    empty_units_test = pd.merge(u, alternatives[["zone_id"]], left_on="building_id", right_index=True)
    print empty_units_test[empty_units_test.zone_id == 1834]["empty"].sum()
    print table[table.index == 472137]

    dset.store_attr(output_varname, year, copy.deepcopy(table[depvar]))
def simulate(dset,
             year,
             depvar='building_id',
             alternatives=None,
             simulation_table='establishments',
             output_names=None,
             agents_groupby=[
                 'income_3_tenure',
             ],
             transition_config=None):

    output_csv, output_title, coeff_name, output_varname = output_names

    if transition_config['Enabled']:
        ct = dset.fetch(transition_config['control_totals_table'])
        ct["total_number_of_jobs"] = (
            ct["total_number_of_jobs"] *
            transition_config['scaling_factor']).astype('int32')
        new_jobs = {
            "table": "dset.establishments",
            "writetotmp": "establishments",
            "model": "transitionmodel",
            "first_year": 2010,
            "control_totals":
            "dset.%s" % transition_config['control_totals_table'],
            "geography_field": "building_id",
            "amount_field": "total_number_of_jobs",
            "size_field": "employees"
        }
        import synthicity.urbansim.transitionmodel as transitionmodel
        transitionmodel.simulate(dset, new_jobs, year=year, show=True)

    dset.establishments.index.name = 'establishment_id'
    choosers = dset.fetch(simulation_table)
    placed_choosers = choosers[choosers[depvar] > 0]

    movers = choosers[choosers[depvar] == -1]
    movers.loc[:, "zone_id"] = -1
    print "Total new agents and movers = %d" % len(movers.index)
    alternatives.building_sqft_per_job = alternatives.building_sqft_per_job.fillna(
        1000)
    alternatives.loc[:,
                     'spaces'] = alternatives.non_residential_sqft / alternatives.building_sqft_per_job  # corrected chained indexing error
    empty_units = alternatives.spaces.sub(
        placed_choosers.groupby('building_id').employees.sum(),
        fill_value=0).astype('int')
    empty_units = empty_units[empty_units > 0].order(ascending=False)

    alts = alternatives.ix[empty_units.index]
    alts["supply"] = empty_units
    lotterychoices = True
    pdf = pd.DataFrame(index=alts.index)

    segments = movers.groupby(agents_groupby)

    for name, segment in segments:
        segment = segment.head(1)
        name = str(name)
        tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name
        ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(
            dset.coeffs[(tmp_coeffname,
                         'fnames')].isnull().values)].values.tolist()
        SAMPLE_SIZE = alts.index.size
        numchoosers = segment.shape[0]
        numalts = alts.shape[0]
        sample = np.tile(alts.index.values, numchoosers)
        alts_sample = alts  #sample#alternatives
        alts_sample['join_index'] = np.repeat(segment.index.values,
                                              SAMPLE_SIZE)
        alts_sample = pd.merge(alts_sample,
                               segment,
                               left_on='join_index',
                               right_index=True,
                               suffixes=('', '_r'))
        chosen = np.zeros((numchoosers, SAMPLE_SIZE))
        chosen[:, 0] = 1
        sample, alternative_sample, est_params = sample, alts_sample, ('mnl',
                                                                       chosen)
        ##Interaction variables
        interaction_vars = [(var, var.split('_x_')) for var in ind_vars
                            if '_x_' in var]
        for ivar in interaction_vars:
            if ivar[1][0].endswith('gt'):
                alternative_sample[ivar[0]] = (
                    (alternative_sample[ivar[1][0]]) >
                    alternative_sample[ivar[1][1]]).astype('int32')
            if ivar[1][0].endswith('lt'):
                alternative_sample[ivar[0]] = (
                    (alternative_sample[ivar[1][0]]) <
                    alternative_sample[ivar[1][1]]).astype('int32')
            else:
                alternative_sample[ivar[0]] = (
                    (alternative_sample[ivar[1][0]]) *
                    alternative_sample[ivar[1][1]])

        est_data = pd.DataFrame(index=alternative_sample.index)
        for varname in ind_vars:
            est_data[varname] = alternative_sample[varname]
        est_data = est_data.fillna(0)
        data = est_data
        data = data.as_matrix()
        coeff = dset.load_coeff(tmp_coeffname)
        probs = interaction.mnl_simulate(data,
                                         coeff,
                                         numalts=SAMPLE_SIZE,
                                         returnprobs=1)
        pdf['segment%s' % name] = pd.Series(probs.flatten(), index=alts.index)

    new_homes = pd.Series(np.ones(len(movers.index)) * -1, index=movers.index)
    mask = np.zeros(len(alts.index), dtype='bool')

    for name, segment in segments:
        name = str(name)
        print "Assigning units to %d agents of segment %s" % (len(
            segment.index), name)
        p = pdf['segment%s' % name]

        def choose(p, mask, alternatives, segment, new_homes, minsize=None):

            choiceset = alternatives.loc[alternatives.supply >= minsize]

            p = copy.copy(p)
            #p[alternatives.supply<minsize] = 0
            p = p[alternatives.supply >= minsize]
            p_arr = p.values

            try:
                #indexes = np.random.choice(len(alternatives.index),len(segment.index),replace=False,p=p/p.sum())
                indexes = np.random.choice(choiceset.index,
                                           len(segment.index),
                                           replace=False,
                                           p=p_arr / p_arr.sum())
            except:
                print "WARNING: not enough options to fit agents, will result in unplaced agents"
                return mask, new_homes
            #new_homes.ix[segment.index] = alternatives.index.values[indexes]
            #alternatives["supply"].ix[alternatives.index.values[indexes]] -= minsize
            new_homes.ix[segment.index] = indexes
            #alternatives["supply"].ix[indexes] -= minsize
            alternatives.loc[indexes, "supply"] -= minsize

            return mask, new_homes

        tmp = segment['employees']
        for name, subsegment in reversed(
                list(segment.groupby(tmp.astype('int')))):
            mask, new_homes = choose(p,
                                     mask,
                                     alts,
                                     subsegment,
                                     new_homes,
                                     minsize=int(name))

    build_cnts = new_homes.value_counts()  #num estabs place in each building
    print "Assigned %d agents to %d locations with %d unplaced" % (
        new_homes.size, build_cnts.size, build_cnts.get(-1, 0))
    new_homes_frame = pd.DataFrame(new_homes, columns=['building_id'])
    result_set = pd.merge(new_homes_frame,
                          dset.buildings,
                          left_on='building_id',
                          right_index=True,
                          how='left')[['building_id', 'zone_id']]
    result_set["employees"] = pd.merge(result_set,
                                       dset.establishments,
                                       left_index=True,
                                       right_index=True)['employees'].values
    table = dset.establishments  # need to go back to the whole dataset
    #table[depvar].ix[new_homes.index] = new_homes.values.astype('int32')
    table.loc[result_set.index, "building_id"] = -1
    table.loc[result_set.index, "zone_id"] = -1
    table.loc[result_set.index, "building_id"] = result_set.building_id.values
    table.loc[result_set.index, "zone_id"] = result_set.zone_id
    #table["zone_id"] = pd.merge(table, result_set, on='building_id', how='left')['zone_id'].values
    #table["zone_id"] = pd.merge(table, dset.buildings, left_on='building_id', right_index=True, how='left')["zone_id_y"].values
    dset.store_attr(output_varname, year, copy.deepcopy(table[depvar]))
def simulate(dset,year,depvar = 'building_id',alternatives=None,simulation_table = None,
              output_names=None,agents_groupby = ['income_3_tenure',],transition_config=None,relocation_config=None):


    output_csv, output_title, coeff_name, output_varname = output_names

    if transition_config['Enabled']:
        ct = dset.fetch(transition_config['control_totals_table'])
        if 'persons' in ct.columns:
            del ct['persons']
        ct["total_number_of_households"] = (ct["total_number_of_households"]*transition_config['scaling_factor']).astype('int32')
        hh = dset.fetch('households')
        persons = dset.fetch('persons')
        tran = transition.TabularTotalsTransition(ct, 'total_number_of_households')
        model = transition.TransitionModel(tran)
        #import pdb; pdb.set_trace()
        new, added, new_linked = model.transition(
                hh, year, linked_tables={'linked': (persons, 'household_id')})
        new.loc[added,'building_id'] = -1
        dset.d['households'] = new
        dset.d['persons'] = new_linked['linked']




    #calculate mortgage payment values

    temp_count = 0

    buildings = alternatives
    out_table = pd.DataFrame(columns=dset.households.columns)
    homeless = pd.DataFrame(columns=dset.households.columns)
    r = .05/12
    n = 360
    buildings['est_mortgage_payment']=buildings.unit_price_residential*((r*(1+r)**n)/((1+r)**n-1))

    dset.households.index.name = 'household_id'
    choosers = dset.fetch(simulation_table)

    if relocation_config['Enabled']:
        rate_table = dset.store[relocation_config['relocation_rates_table']].copy()
        rate_field = "probability_of_relocating"
        rate_table[rate_field] = rate_table[rate_field]*.01*relocation_config['scaling_factor']
        movers = dset.relocation_rates(choosers,rate_table,rate_field)
        choosers[depvar].ix[movers] = -1

    movers_all = choosers[choosers[depvar]==-1]
    county_growth_share = pd.read_csv(os.path.join(misc.data_dir(),'county_growth_share.csv'), index_col=0 )
    counties = county_growth_share.columns.values
    current_growth_shares = county_growth_share.loc[year].values
    movers_counties = np.random.choice(counties, movers_all.shape[0], replace=True, p=current_growth_shares)

    movers_all['county_id'] = movers_counties


    income_segment = movers_all.groupby('income_grp')['upper_income_grp_val','lower_income_grp_val'].agg([np.mean, np.size])
    # get county growth control data and merge with income_segements

    income_segment['county'] = county_growth_share.loc[year].index.values[0]
    income_segment['growth_share'] = county_growth_share.loc[year][0]
    copy_df = income_segment.copy()
    for i in county_growth_share.loc[year][1:].iteritems():

        copy_df['county'] = i[0]
        copy_df['growth_share'] = i[1]
        income_segment = pd.concat([income_segment, copy_df])

    income_segment = income_segment.set_index(['county', income_segment.index])

    print "Total new agents and movers = %d" % len(movers_all.index)



    for seg in income_segment.iterrows():


        movers = movers_all[(movers_all['income']<= seg[1][0]) & (movers_all['income']>= seg[1][2])]
        print 'County: %s. Placing %d households in the income range (%d, %d)' % (seg[0][0],seg[1][1],seg[1][2], seg[1][0])

        empty_units = buildings.residential_units.sub(choosers[choosers['building_id']!=-1].groupby('building_id').size(),fill_value=0)
        empty_units = empty_units[empty_units>0].order(ascending=False)
        print 'number of empty units is %d' %empty_units.sum()
        alternatives = buildings.ix[np.repeat(empty_units.index.values,empty_units.values.astype('int'))]
        alternatives = alternatives[alternatives.county_id == int(seg[0][0])]

        if((seg[1][2]/12) <= 0):
            alts = alternatives[alternatives['unit_price_residential'] < 186281]
        elif((seg[1][2]/12) >= 55000):
            alts = alternatives[alternatives['unit_price_residential'] > 1583400]
        else:
            alts = alternatives[alternatives['est_mortgage_payment'] / (seg[1][2]/12) <= 0.33]
        if(alts.shape[0] == 0):
            homeless = pd.concat([choosers, homeless])
            print 'Could not place %d households due to income restrictions' % seg[1][1]
            continue




        pdf = pd.DataFrame(index=alts.index)

        segments = movers.groupby(agents_groupby)

        ##simulation
        for name, segment in segments:
            segment = segment.head(1)
            name = str(name)
            tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name
            ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist()
            SAMPLE_SIZE = alts.index.size
            numchoosers = segment.shape[0]
            numalts = alts.shape[0]
            sample = np.tile(alts.index.values,numchoosers)
            alts_sample = alts
            alts_sample['join_index'] = np.repeat(segment.index.values,SAMPLE_SIZE)
            alts_sample = pd.merge(alts_sample,segment,left_on='join_index',right_index=True,suffixes=('','_r'))
            chosen = np.zeros((numchoosers,SAMPLE_SIZE))
            chosen[:,0] = 1
            sample, alternative_sample, est_params = sample, alts_sample, ('mnl',chosen)
            ##Interaction variables
            interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var]
            for ivar in interaction_vars:
                if ivar[1][0].endswith('gt'):
                    alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])>alternative_sample[ivar[1][1]]).astype('int32')
                if ivar[1][0].endswith('lt'):
                    alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])<alternative_sample[ivar[1][1]]).astype('int32')
                else:
                    alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])*alternative_sample[ivar[1][1]])

            est_data = pd.DataFrame(index=alternative_sample.index)
            for varname in ind_vars:
                est_data[varname] = alternative_sample[varname]
            est_data = est_data.fillna(0)
            data = est_data
            data = data.as_matrix()
            coeff = dset.load_coeff(tmp_coeffname)
            probs = interaction.mnl_simulate(data,coeff,numalts=SAMPLE_SIZE,returnprobs=1)
            pdf['segment%s'%name] = pd.Series(probs.flatten(),index=alts.index)

        new_homes = pd.Series(np.ones(len(movers.index))*-1,index=movers.index)
        for name, segment in segments:
            name_coeff = str(name)
            name = str(name)
            p=pdf['segment%s'%name]
            mask = np.zeros(len(alts.index),dtype='bool')
            mask = pd.Series(mask, index=alts.index)

            print "Assigning units to %d agents of segment %s" % (len(segment.index),name)

            def choose(p,mask,alternatives,segment,new_homes,minsize=None):
                p = copy.copy(p)
                p.loc[mask[mask==True].index] = 0 # already chosen
                try:
                    indexes = np.random.choice(alternatives.index.values,len(segment.index),replace=False,p=p.values/p.values.sum())
                except:
                    print "WARNING: not enough options to fit agents, will result in unplaced agents"
                    indexes = np.random.choice(alternatives.index.values,len(alternatives.index.values),replace=False,p=p.values/p.values.sum())

                    if(new_homes.ix[segment[segment.tenure==2].index.values[:len(alternatives.index.values)]].shape[0] != 0):
                        new_homes.ix[segment[segment.tenure==2].index.values[:len(alternatives.index.values)]] = -2
                    else:
                        new_homes.ix[segment.index.values[:len(alternatives.index.values)]] = alternatives.index.values

                    mask.loc[indexes] = True
                    return mask,new_homes

                new_homes.ix[segment.index] = alternatives.loc[indexes].index.values[:len(new_homes.ix[segment.index])]
                mask.loc[indexes] = True

                return mask,new_homes
            mask,new_homes = choose(p,mask,alts,segment,new_homes)

        build_cnts = new_homes.value_counts()  #num households place in each building
        print "Assigned %d agents to %d locations with %d unplaced" % (new_homes.size,build_cnts.size,build_cnts.get(-1,0))

        table = dset.households # need to go back to the whole dataset
        table[depvar].ix[new_homes.index] = new_homes.values.astype('int32')
        #table.to_sql('tmp_out', engine, if_exists='append')
        table = table.ix[new_homes.index]
        out_table = pd.concat([table, out_table])
        choosers.loc[table.index] = table
        #dset.store_attr(output_varname,year,copy.deepcopy(table[depvar]))
        # old_building_count = buildings.shape[0]
        # buildings = buildings.drop(new_homes.index)
        # new_building_count = buildings.shape[0]
        # print '%d units were filled' %(new_building_count - old_building_count)
        #buildings = buildings.drop(new_homes)
        #temp_count += 1
        if(temp_count > 50):
            break
    #out_table.to_csv('C:/users/jmartinez/documents/households_simulation_test.csv')
    dset.households.loc[out_table.index] = out_table
def simulate(dset,year,depvar = 'building_id',alternatives=None,simulation_table = None,
              output_names=None,agents_groupby = ['income_3_tenure',],transition_config=None,relocation_config=None):

    import synthicity.urbansim.interaction as interaction
    import pandas as pd, numpy as np, copy

    from synthicity.utils import misc
    from drcog.models import transition


    temp_count = 0
    output_csv, output_title, coeff_name, output_varname = output_names
    buildings = alternatives
    income_segment = dset.households.groupby('income').size()
    out_table = pd.DataFrame(columns=dset.households.columns)
    homeless = pd.DataFrame(columns=dset.households.columns)
    r = .05/12
    n = 360
    buildings['est_mortgage_payment']=buildings.unit_price_residential*((r*(1+r)**n)/((1+r)**n-1))

    for seg in income_segment.iteritems():


        choosers = simulation_table[simulation_table['income']== seg[0]]
        print 'Placing %d households with an income of % d' % (seg[1],seg[0])

        empty_units = buildings.residential_units.sub(simulation_table[simulation_table['building_id']!=-1].groupby('building_id').size(),fill_value=0)
        empty_units = empty_units[empty_units>0].order(ascending=False)
        print 'number of empty units is %d' %empty_units.sum()
        alternatives = buildings.ix[np.repeat(empty_units.index,empty_units.values.astype('int'))]


        if((seg[0]/12) <= 0):
            alts = alternatives[alternatives['unit_price_residential'] < 186281]
        elif((seg[0]/12) >= 55000):
            alts = alternatives[alternatives['unit_price_residential'] > 1583400]
        else:
            alts = alternatives[alternatives['est_mortgage_payment'] / (seg[0]/12) <= 0.33]
        if(alts.shape[0] == 0):
            homeless = pd.concat([choosers, homeless])
            print 'Could not place %d households due to income restrictions' % seg[1]
            continue




        pdf = pd.DataFrame(index=alts.index)

        segments = choosers.groupby(agents_groupby)

        ##simulation
        for name, segment in segments:
            segment = segment.head(1)
            name = str(name)
            tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name
            ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist()
            SAMPLE_SIZE = alts.index.size
            numchoosers = segment.shape[0]
            numalts = alts.shape[0]
            sample = np.tile(alts.index.values,numchoosers)
            alts_sample = alts
            alts_sample['join_index'] = np.repeat(segment.index,SAMPLE_SIZE)
            alts_sample = pd.merge(alts_sample,segment,left_on='join_index',right_index=True,suffixes=('','_r'))
            chosen = np.zeros((numchoosers,SAMPLE_SIZE))
            chosen[:,0] = 1
            sample, alternative_sample, est_params = sample, alts_sample, ('mnl',chosen)
            ##Interaction variables
            interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var]
            for ivar in interaction_vars:
                if ivar[1][0].endswith('gt'):
                    alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])>alternative_sample[ivar[1][1]]).astype('int32')
                if ivar[1][0].endswith('lt'):
                    alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])<alternative_sample[ivar[1][1]]).astype('int32')
                else:
                    alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])*alternative_sample[ivar[1][1]])

            est_data = pd.DataFrame(index=alternative_sample.index)
            for varname in ind_vars:
                est_data[varname] = alternative_sample[varname]
            est_data = est_data.fillna(0)
            data = est_data
            data = data.as_matrix()
            coeff = dset.load_coeff(tmp_coeffname)
            probs = interaction.mnl_simulate(data,coeff,numalts=SAMPLE_SIZE,returnprobs=1)
            pdf['segment%s'%name] = pd.Series(probs.flatten(),index=alts.index)

        new_homes = pd.Series(np.ones(len(choosers.index))*-1,index=choosers.index)
        for name, segment in segments:
            name_coeff = str(name)
            name = str(name)
            p=pdf['segment%s'%name]
            mask = np.zeros(len(alts.index),dtype='bool')
            mask = pd.Series(mask, index=alts.index)

            print "Assigning units to %d agents of segment %s" % (len(segment.index),name)

            def choose(p,mask,alternatives,segment,new_homes,minsize=None):
                p = copy.copy(p)
                p.loc[mask[mask==True].index] = 0 # already chosen
                try:
                    indexes = np.random.choice(alternatives.index.values,len(segment.index),replace=False,p=p.values/p.values.sum())
                except:
                    print "WARNING: not enough options to fit agents, will result in unplaced agents"
                    indexes = np.random.choice(alternatives.index.values,len(alternatives.index.values),replace=False,p=p.values/p.values.sum())

                    if(new_homes.ix[segment[segment.tenure==2].index.values[:len(alternatives.index.values)]].shape[0] != 0):
                        new_homes.ix[segment[segment.tenure==2].index.values[:len(alternatives.index.values)]] = -2
                    else:
                        new_homes.ix[segment.index.values[:len(alternatives.index.values)]] = alternatives.index.values

                    mask.loc[indexes] = True
                    return mask,new_homes

                new_homes.ix[segment.index] = alternatives.loc[indexes].index.values[:len(new_homes.ix[segment.index])]
                mask.loc[indexes] = True

                return mask,new_homes
            mask,new_homes = choose(p,mask,alts,segment,new_homes)

        build_cnts = new_homes.value_counts()  #num households place in each building
        print "Assigned %d agents to %d locations with %d unplaced" % (new_homes.size,build_cnts.size,build_cnts.get(-1,0))

        table = simulation_table # need to go back to the whole dataset
        table[depvar].ix[new_homes.index] = new_homes.values.astype('int32')
        #table.to_sql('tmp_out', engine, if_exists='append')
        table = table.ix[new_homes.index]
        out_table = pd.concat([table, out_table])
        simulation_table.loc[table.index] = table
        #dset.store_attr(output_varname,year,copy.deepcopy(table[depvar]))
        # old_building_count = buildings.shape[0]
        # buildings = buildings.drop(new_homes.index)
        # new_building_count = buildings.shape[0]
        # print '%d units were filled' %(new_building_count - old_building_count)
        #buildings = buildings.drop(new_homes)
        #temp_count += 1
        if(temp_count > 50):
            break
    out_table.to_csv('C:/users/jmartinez/documents/households_new_location.csv')
Exemple #8
0
def simulate(dset,
             year,
             depvar='building_id',
             alternatives=None,
             simulation_table='households',
             output_names=None,
             agents_groupby=[
                 'income_3_tenure',
             ],
             transition_config=None,
             relocation_config=None):

    output_csv, output_title, coeff_name, output_varname = output_names

    if transition_config['Enabled']:
        ct = dset.fetch(transition_config['control_totals_table'])
        if 'persons' in ct.columns:
            del ct['persons']
        ct["total_number_of_households"] = (
            ct["total_number_of_households"] *
            transition_config['scaling_factor']).astype('int32')
        hh = dset.fetch('households')
        persons = dset.fetch('persons')
        tran = transition.TabularTotalsTransition(
            ct, 'total_number_of_households')
        model = transition.TransitionModel(tran)
        #import pdb; pdb.set_trace()
        new, added, new_linked = model.transition(
            hh, year, linked_tables={'linked': (persons, 'household_id')})
        new.loc[added, 'building_id'] = -1
        dset.d['households'] = new
        dset.d['persons'] = new_linked['linked']
        # new_hhlds = {"table": "dset.households","writetotmp": "households","model": "transitionmodel","first_year": 2010,"control_totals": "dset.%s"%transition_config['control_totals_table'],
        # "geography_field": "building_id","amount_field": "total_number_of_households"}
        # import synthicity.urbansim.transitionmodel as transitionmodel
        # transitionmodel.simulate(dset,new_hhlds,year=year,show=True,subtract=True)

    dset.households.index.name = 'household_id'
    choosers = dset.fetch(simulation_table)

    if relocation_config['Enabled']:
        rate_table = dset.store[
            relocation_config['relocation_rates_table']].copy()
        rate_field = "probability_of_relocating"
        rate_table[rate_field] = rate_table[
            rate_field] * .01 * relocation_config['scaling_factor']
        movers = dset.relocation_rates(choosers, rate_table, rate_field)
        choosers[depvar].ix[movers] = -1

    movers = choosers[choosers[depvar] == -1]
    print "Total new agents and movers = %d" % len(movers.index)
    empty_units = dset.buildings[(dset.buildings.residential_units >
                                  0)].residential_units.sub(
                                      choosers.groupby('building_id').size(),
                                      fill_value=0)
    empty_units = empty_units[empty_units > 0].order(ascending=False)
    alternatives = alternatives.ix[np.repeat(empty_units.index.values,
                                             empty_units.values.astype('int'))]
    alts = alternatives
    pdf = pd.DataFrame(index=alts.index)

    segments = movers.groupby(agents_groupby)

    for name, segment in segments:
        segment = segment.head(1)
        name = str(name)
        tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name
        ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(
            dset.coeffs[(tmp_coeffname,
                         'fnames')].isnull().values)].values.tolist()
        SAMPLE_SIZE = alts.index.size
        numchoosers = segment.shape[0]
        numalts = alts.shape[0]
        sample = np.tile(alts.index.values, numchoosers)
        alts_sample = alts
        alts_sample.loc[:, 'join_index'] = np.repeat(
            segment.index.values, SAMPLE_SIZE)  # corrected chained index error
        alts_sample = pd.merge(alts_sample,
                               segment,
                               left_on='join_index',
                               right_index=True,
                               suffixes=('', '_r'))
        chosen = np.zeros((numchoosers, SAMPLE_SIZE))
        chosen[:, 0] = 1
        sample, alternative_sample, est_params = sample, alts_sample, ('mnl',
                                                                       chosen)
        ##Interaction variables
        interaction_vars = [(var, var.split('_x_')) for var in ind_vars
                            if '_x_' in var]
        for ivar in interaction_vars:
            if ivar[1][0].endswith('gt'):
                alternative_sample[ivar[0]] = (
                    (alternative_sample[ivar[1][0]]) >
                    alternative_sample[ivar[1][1]]).astype('int32')
            if ivar[1][0].endswith('lt'):
                alternative_sample[ivar[0]] = (
                    (alternative_sample[ivar[1][0]]) <
                    alternative_sample[ivar[1][1]]).astype('int32')
            else:
                alternative_sample[ivar[0]] = (
                    (alternative_sample[ivar[1][0]]) *
                    alternative_sample[ivar[1][1]])

        est_data = pd.DataFrame(index=alternative_sample.index)
        for varname in ind_vars:
            est_data[varname] = alternative_sample[varname]
        est_data = est_data.fillna(0)
        data = est_data
        data = data.as_matrix()
        coeff = dset.load_coeff(tmp_coeffname)
        probs = interaction.mnl_simulate(data,
                                         coeff,
                                         numalts=SAMPLE_SIZE,
                                         returnprobs=1)
        pdf['segment%s' % name] = pd.Series(probs.flatten(), index=alts.index)

    new_homes = pd.Series(np.ones(len(movers.index)) * -1, index=movers.index)
    for name, segment in segments:
        name_coeff = str(name)
        name = str(name)
        p = pdf['segment%s' % name].values
        mask = np.zeros(len(alts.index), dtype='bool')

        print "Assigning units to %d agents of segment %s" % (len(
            segment.index), name)

        def choose(p, mask, alternatives, segment, new_homes, minsize=None):
            p = copy.copy(p)
            p[mask] = 0  # already chosen
            try:
                indexes = np.random.choice(len(alternatives.index),
                                           len(segment.index),
                                           replace=False,
                                           p=p / p.sum())
            except:
                print "WARNING: not enough options to fit agents, will result in unplaced agents"
                return mask, new_homes
            new_homes.ix[segment.index] = alternatives.index.values[indexes]
            mask[indexes] = 1

            return mask, new_homes

        mask, new_homes = choose(p, mask, alts, segment, new_homes)

    build_cnts = new_homes.value_counts(
    )  #num households place in each building
    print "Assigned %d agents to %d locations with %d unplaced" % (
        new_homes.size, build_cnts.size, build_cnts.get(-1, 0))

    table = dset.households  # need to go back to the whole dataset
    table[depvar].ix[new_homes.index] = new_homes.values.astype('int32')
    dset.store_attr(output_varname, year, copy.deepcopy(table[depvar]))
def simulate(dset,year,depvar = 'building_id',alternatives=None,simulation_table = 'establishments',
              output_names=None,agents_groupby = ['income_3_tenure',],transition_config=None):

    output_csv, output_title, coeff_name, output_varname = output_names 

    if transition_config['Enabled']:
        ct = dset.fetch(transition_config['control_totals_table'])
        ct["total_number_of_jobs"] = (ct["total_number_of_jobs"]*transition_config['scaling_factor']).astype('int32')
        new_jobs = {"table": "dset.establishments","writetotmp": "establishments","model": "transitionmodel","first_year": 2010,"control_totals": "dset.%s"%transition_config['control_totals_table'],
                    "geography_field": "building_id","amount_field": "total_number_of_jobs","size_field":"employees"}
        import synthicity.urbansim.transitionmodel as transitionmodel
        transitionmodel.simulate(dset,new_jobs,year=year,show=True)
        
    dset.establishments.index.name = 'establishment_id'
    choosers = dset.fetch(simulation_table)
    placed_choosers = choosers[choosers[depvar]>0]

    movers = choosers[choosers[depvar]==-1]
    movers.loc[:, "zone_id"] = -1
    print "Total new agents and movers = %d" % len(movers.index)
    alternatives.building_sqft_per_job = alternatives.building_sqft_per_job.fillna(1000)
    alternatives.loc[:, 'spaces'] = alternatives.non_residential_sqft/alternatives.building_sqft_per_job  # corrected chained indexing error
    empty_units = alternatives.spaces.sub(placed_choosers.groupby('building_id').employees.sum(),fill_value=0).astype('int')
    empty_units = empty_units[empty_units>0].order(ascending=False)

    alts = alternatives.ix[empty_units.index]
    alts["supply"] = empty_units
    lotterychoices = True
    pdf = pd.DataFrame(index=alts.index)

    segments = movers.groupby(agents_groupby)

    for name, segment in segments:
        segment = segment.head(1)
        name = str(name)
        tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name
        ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist()
        SAMPLE_SIZE = alts.index.size 
        numchoosers = segment.shape[0]
        numalts = alts.shape[0]
        sample = np.tile(alts.index.values,numchoosers)
        alts_sample = alts #sample#alternatives
        alts_sample['join_index'] = np.repeat(segment.index.values,SAMPLE_SIZE)
        alts_sample = pd.merge(alts_sample,segment,left_on='join_index',right_index=True,suffixes=('','_r'))
        chosen = np.zeros((numchoosers,SAMPLE_SIZE))
        chosen[:,0] = 1
        sample, alternative_sample, est_params = sample, alts_sample, ('mnl',chosen)
        ##Interaction variables
        interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var]
        for ivar in interaction_vars:
            if ivar[1][0].endswith('gt'):
                alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])>alternative_sample[ivar[1][1]]).astype('int32')
            if ivar[1][0].endswith('lt'):
                alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])<alternative_sample[ivar[1][1]]).astype('int32')
            else:
                alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])*alternative_sample[ivar[1][1]])
                
        est_data = pd.DataFrame(index=alternative_sample.index)
        for varname in ind_vars:
            est_data[varname] = alternative_sample[varname]
        est_data = est_data.fillna(0)
        data = est_data
        data = data.as_matrix()
        coeff = dset.load_coeff(tmp_coeffname)
        probs = interaction.mnl_simulate(data,coeff,numalts=SAMPLE_SIZE,returnprobs=1)
        pdf['segment%s'%name] = pd.Series(probs.flatten(),index=alts.index) 
            
    new_homes = pd.Series(np.ones(len(movers.index))*-1,index=movers.index)
    mask = np.zeros(len(alts.index),dtype='bool')

    for name, segment in segments:
        name = str(name)
        print "Assigning units to %d agents of segment %s" % (len(segment.index),name)
        p=pdf['segment%s'%name]
        def choose(p,mask,alternatives,segment,new_homes,minsize=None):

            choiceset = alternatives.loc[alternatives.supply >= minsize]

            p = copy.copy(p)
            #p[alternatives.supply<minsize] = 0
            p = p[alternatives.supply >= minsize]
            p_arr = p.values


            try:
            #indexes = np.random.choice(len(alternatives.index),len(segment.index),replace=False,p=p/p.sum())
              indexes = np.random.choice(choiceset.index, len(segment.index), replace=False, p=p_arr/p_arr.sum())
            except:
              print "WARNING: not enough options to fit agents, will result in unplaced agents"
              return mask,new_homes
            #new_homes.ix[segment.index] = alternatives.index.values[indexes]
            #alternatives["supply"].ix[alternatives.index.values[indexes]] -= minsize
            new_homes.ix[segment.index] = indexes
            #alternatives["supply"].ix[indexes] -= minsize
            alternatives.loc[indexes, "supply"] -= minsize

            return mask,new_homes
        tmp = segment['employees']
        for name, subsegment in reversed(list(segment.groupby(tmp.astype('int')))):
            mask,new_homes = choose(p,mask,alts,subsegment,new_homes,minsize=int(name))

    build_cnts = new_homes.value_counts()  #num estabs place in each building
    print "Assigned %d agents to %d locations with %d unplaced" % (new_homes.size,build_cnts.size,build_cnts.get(-1,0))
    new_homes_frame = pd.DataFrame(new_homes, columns=['building_id'])
    result_set = pd.merge(new_homes_frame, dset.buildings, left_on='building_id', right_index=True, how='left')[['building_id','zone_id']]
    result_set["employees"] = pd.merge(result_set, dset.establishments, left_index=True, right_index=True)['employees'].values
    table = dset.establishments # need to go back to the whole dataset
    #table[depvar].ix[new_homes.index] = new_homes.values.astype('int32')
    table.loc[result_set.index, "building_id"] = -1
    table.loc[result_set.index, "zone_id"] = -1
    table.loc[result_set.index,"building_id"] = result_set.building_id.values
    table.loc[result_set.index,"zone_id"] = result_set.zone_id
    #table["zone_id"] = pd.merge(table, result_set, on='building_id', how='left')['zone_id'].values
    #table["zone_id"] = pd.merge(table, dset.buildings, left_on='building_id', right_index=True, how='left')["zone_id_y"].values
    dset.store_attr(output_varname,year,copy.deepcopy(table[depvar]))
def simulate(dset,year,depvar = 'building_id',alternatives=None,simulation_table = 'households',
              output_names=None,agents_groupby = ['income_3_tenure',],transition_config=None,relocation_config=None):

    output_csv, output_title, coeff_name, output_varname = output_names 

    if transition_config['Enabled']:
        ct = dset.fetch(transition_config['control_totals_table'])
        if 'persons' in ct.columns:
            del ct['persons']
        ct["total_number_of_households"] = (ct["total_number_of_households"]*transition_config['scaling_factor']).astype('int32')
        hh = dset.fetch('households')
        persons = dset.fetch('persons')
        tran = transition.TabularTotalsTransition(ct, 'total_number_of_households')
        model = transition.TransitionModel(tran)
        #import pdb; pdb.set_trace()
        new, added, new_linked = model.transition(
                hh, year, linked_tables={'linked': (persons, 'household_id')})
        new.loc[added,'building_id'] = -1
        dset.d['households'] = new
        dset.d['persons'] = new_linked['linked']
        # new_hhlds = {"table": "dset.households","writetotmp": "households","model": "transitionmodel","first_year": 2010,"control_totals": "dset.%s"%transition_config['control_totals_table'],
                     # "geography_field": "building_id","amount_field": "total_number_of_households"}
        # import synthicity.urbansim.transitionmodel as transitionmodel
        # transitionmodel.simulate(dset,new_hhlds,year=year,show=True,subtract=True)

    dset.households.index.name = 'household_id'
    choosers = dset.fetch(simulation_table)
        
    if relocation_config['Enabled']:
        rate_table = dset.store[relocation_config['relocation_rates_table']].copy()
        rate_field = "probability_of_relocating"
        rate_table[rate_field] = rate_table[rate_field]*.01*relocation_config['scaling_factor']
        movers = dset.relocation_rates(choosers,rate_table,rate_field)
        choosers[depvar].ix[movers] = -1

    movers = choosers[choosers[depvar]==-1]
    print "Total new agents and movers = %d" % len(movers.index)
    empty_units = dset.buildings[(dset.buildings.residential_units>0)].residential_units.sub(choosers.groupby('building_id').size(),fill_value=0)
    empty_units = empty_units[empty_units>0].order(ascending=False)
    alternatives = alternatives.ix[np.repeat(empty_units.index.values,empty_units.values.astype('int'))]
    alts = alternatives
    pdf = pd.DataFrame(index=alts.index)

    segments = movers.groupby(agents_groupby)

    for name, segment in segments:
        segment = segment.head(1)
        name = str(name)
        tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name
        ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist()
        SAMPLE_SIZE = alts.index.size 
        numchoosers = segment.shape[0]
        numalts = alts.shape[0]
        sample = np.tile(alts.index.values,numchoosers)
        alts_sample = alts
        alts_sample.loc[:, 'join_index'] = np.repeat(segment.index.values,SAMPLE_SIZE)  # corrected chained index error
        alts_sample = pd.merge(alts_sample,segment,left_on='join_index',right_index=True,suffixes=('','_r'))
        chosen = np.zeros((numchoosers,SAMPLE_SIZE))
        chosen[:,0] = 1
        sample, alternative_sample, est_params = sample, alts_sample, ('mnl',chosen)
        ##Interaction variables
        interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var]
        for ivar in interaction_vars:
            if ivar[1][0].endswith('gt'):
                alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])>alternative_sample[ivar[1][1]]).astype('int32')
            if ivar[1][0].endswith('lt'):
                alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])<alternative_sample[ivar[1][1]]).astype('int32')
            else:
                alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])*alternative_sample[ivar[1][1]])
                    
        est_data = pd.DataFrame(index=alternative_sample.index)
        for varname in ind_vars:
            est_data[varname] = alternative_sample[varname]
        est_data = est_data.fillna(0)
        data = est_data
        data = data.as_matrix()
        coeff = dset.load_coeff(tmp_coeffname)
        probs = interaction.mnl_simulate(data,coeff,numalts=SAMPLE_SIZE,returnprobs=1)
        pdf['segment%s'%name] = pd.Series(probs.flatten(),index=alts.index)  

    new_homes = pd.Series(np.ones(len(movers.index))*-1,index=movers.index)
    for name, segment in segments:
        name_coeff = str(name)
        name = str(name)
        p=pdf['segment%s'%name].values
        mask = np.zeros(len(alts.index),dtype='bool')

        print "Assigning units to %d agents of segment %s" % (len(segment.index),name)
     
        def choose(p,mask,alternatives,segment,new_homes,minsize=None):
            p = copy.copy(p)
            p[mask] = 0 # already chosen
            try: 
              indexes = np.random.choice(len(alternatives.index),len(segment.index),replace=False,p=p/p.sum())
            except:
              print "WARNING: not enough options to fit agents, will result in unplaced agents"
              return mask,new_homes
            new_homes.ix[segment.index] = alternatives.index.values[indexes]
            mask[indexes] = 1
          
            return mask,new_homes
        mask,new_homes = choose(p,mask,alts,segment,new_homes)
        
    build_cnts = new_homes.value_counts()  #num households place in each building
    print "Assigned %d agents to %d locations with %d unplaced" % (new_homes.size,build_cnts.size,build_cnts.get(-1,0))

    table = dset.households # need to go back to the whole dataset
    table[depvar].ix[new_homes.index] = new_homes.values.astype('int32')
    dset.store_attr(output_varname,year,copy.deepcopy(table[depvar]))
Exemple #11
0
def simulate(dset,
             year,
             depvar='building_id',
             alternatives=None,
             simulation_table=None,
             output_names=None,
             agents_groupby=[
                 'income_3_tenure',
             ],
             transition_config=None,
             relocation_config=None):

    import synthicity.urbansim.interaction as interaction
    import pandas as pd, numpy as np, copy

    from synthicity.utils import misc
    from drcog.models import transition

    temp_count = 0
    output_csv, output_title, coeff_name, output_varname = output_names
    buildings = alternatives
    income_segment = dset.households.groupby('income').size()
    out_table = pd.DataFrame(columns=dset.households.columns)
    homeless = pd.DataFrame(columns=dset.households.columns)
    r = .05 / 12
    n = 360
    buildings['est_mortgage_payment'] = buildings.unit_price_residential * (
        (r * (1 + r)**n) / ((1 + r)**n - 1))

    for seg in income_segment.iteritems():

        choosers = simulation_table[simulation_table['income'] == seg[0]]
        print 'Placing %d households with an income of % d' % (seg[1], seg[0])

        empty_units = buildings.residential_units.sub(
            simulation_table[simulation_table['building_id'] != -1].groupby(
                'building_id').size(),
            fill_value=0)
        empty_units = empty_units[empty_units > 0].order(ascending=False)
        print 'number of empty units is %d' % empty_units.sum()
        alternatives = buildings.ix[np.repeat(
            empty_units.index, empty_units.values.astype('int'))]

        if ((seg[0] / 12) <= 0):
            alts = alternatives[
                alternatives['unit_price_residential'] < 186281]
        elif ((seg[0] / 12) >= 55000):
            alts = alternatives[
                alternatives['unit_price_residential'] > 1583400]
        else:
            alts = alternatives[alternatives['est_mortgage_payment'] /
                                (seg[0] / 12) <= 0.33]
        if (alts.shape[0] == 0):
            homeless = pd.concat([choosers, homeless])
            print 'Could not place %d households due to income restrictions' % seg[
                1]
            continue

        pdf = pd.DataFrame(index=alts.index)

        segments = choosers.groupby(agents_groupby)

        ##simulation
        for name, segment in segments:
            segment = segment.head(1)
            name = str(name)
            tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name
            ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(
                dset.coeffs[(tmp_coeffname,
                             'fnames')].isnull().values)].values.tolist()
            SAMPLE_SIZE = alts.index.size
            numchoosers = segment.shape[0]
            numalts = alts.shape[0]
            sample = np.tile(alts.index.values, numchoosers)
            alts_sample = alts
            alts_sample['join_index'] = np.repeat(segment.index, SAMPLE_SIZE)
            alts_sample = pd.merge(alts_sample,
                                   segment,
                                   left_on='join_index',
                                   right_index=True,
                                   suffixes=('', '_r'))
            chosen = np.zeros((numchoosers, SAMPLE_SIZE))
            chosen[:, 0] = 1
            sample, alternative_sample, est_params = sample, alts_sample, (
                'mnl', chosen)
            ##Interaction variables
            interaction_vars = [(var, var.split('_x_')) for var in ind_vars
                                if '_x_' in var]
            for ivar in interaction_vars:
                if ivar[1][0].endswith('gt'):
                    alternative_sample[ivar[0]] = (
                        (alternative_sample[ivar[1][0]]) >
                        alternative_sample[ivar[1][1]]).astype('int32')
                if ivar[1][0].endswith('lt'):
                    alternative_sample[ivar[0]] = (
                        (alternative_sample[ivar[1][0]]) <
                        alternative_sample[ivar[1][1]]).astype('int32')
                else:
                    alternative_sample[ivar[0]] = (
                        (alternative_sample[ivar[1][0]]) *
                        alternative_sample[ivar[1][1]])

            est_data = pd.DataFrame(index=alternative_sample.index)
            for varname in ind_vars:
                est_data[varname] = alternative_sample[varname]
            est_data = est_data.fillna(0)
            data = est_data
            data = data.as_matrix()
            coeff = dset.load_coeff(tmp_coeffname)
            probs = interaction.mnl_simulate(data,
                                             coeff,
                                             numalts=SAMPLE_SIZE,
                                             returnprobs=1)
            pdf['segment%s' % name] = pd.Series(probs.flatten(),
                                                index=alts.index)

        new_homes = pd.Series(np.ones(len(choosers.index)) * -1,
                              index=choosers.index)
        for name, segment in segments:
            name_coeff = str(name)
            name = str(name)
            p = pdf['segment%s' % name]
            mask = np.zeros(len(alts.index), dtype='bool')
            mask = pd.Series(mask, index=alts.index)

            print "Assigning units to %d agents of segment %s" % (len(
                segment.index), name)

            def choose(p,
                       mask,
                       alternatives,
                       segment,
                       new_homes,
                       minsize=None):
                p = copy.copy(p)
                p.loc[mask[mask == True].index] = 0  # already chosen
                try:
                    indexes = np.random.choice(alternatives.index.values,
                                               len(segment.index),
                                               replace=False,
                                               p=p.values / p.values.sum())
                except:
                    print "WARNING: not enough options to fit agents, will result in unplaced agents"
                    indexes = np.random.choice(alternatives.index.values,
                                               len(alternatives.index.values),
                                               replace=False,
                                               p=p.values / p.values.sum())

                    if (new_homes.ix[segment[segment.tenure == 2].index.
                                     values[:len(alternatives.index.values)]].
                            shape[0] != 0):
                        new_homes.ix[
                            segment[segment.tenure == 2].index.
                            values[:len(alternatives.index.values)]] = -2
                    else:
                        new_homes.ix[segment.index.values[:len(
                            alternatives.index.values
                        )]] = alternatives.index.values

                    mask.loc[indexes] = True
                    return mask, new_homes

                new_homes.ix[segment.index] = alternatives.loc[
                    indexes].index.values[:len(new_homes.ix[segment.index])]
                mask.loc[indexes] = True

                return mask, new_homes

            mask, new_homes = choose(p, mask, alts, segment, new_homes)

        build_cnts = new_homes.value_counts(
        )  #num households place in each building
        print "Assigned %d agents to %d locations with %d unplaced" % (
            new_homes.size, build_cnts.size, build_cnts.get(-1, 0))

        table = simulation_table  # need to go back to the whole dataset
        table[depvar].ix[new_homes.index] = new_homes.values.astype('int32')
        #table.to_sql('tmp_out', engine, if_exists='append')
        table = table.ix[new_homes.index]
        out_table = pd.concat([table, out_table])
        simulation_table.loc[table.index] = table
        #dset.store_attr(output_varname,year,copy.deepcopy(table[depvar]))
        # old_building_count = buildings.shape[0]
        # buildings = buildings.drop(new_homes.index)
        # new_building_count = buildings.shape[0]
        # print '%d units were filled' %(new_building_count - old_building_count)
        #buildings = buildings.drop(new_homes)
        #temp_count += 1
        if (temp_count > 50):
            break
    out_table.to_csv(
        'C:/users/jmartinez/documents/households_new_location.csv')