Example #1
0
def test_scale_to_targets_from_table_clip_int(df, target_col):
    targets = pd.DataFrame(
        {'column_name': [target_col],
         'target_value': [1000],
         'target_metric': ['mean'],
         'filters': [np.nan],
         'clip_low': [400],
         'clip_high': [999.99],
         'int_result': [True]})

    result = scl.scale_to_targets_from_table(df, targets)

    pdt.assert_index_equal(result.columns, df.columns)
    pdt.assert_series_equal(
        result[target_col],
        pd.Series([400, 400, 545, 727, 909, 1000, 1000, 1000, 1000, 1000]))
Example #2
0
def test_scale_to_targets_from_table_clip_int(df, target_col):
    targets = pd.DataFrame({
        'column_name': [target_col],
        'target_value': [1000],
        'target_metric': ['mean'],
        'filters': [np.nan],
        'clip_low': [400],
        'clip_high': [999.99],
        'int_result': [True]
    })

    result = scl.scale_to_targets_from_table(df, targets)

    pdt.assert_index_equal(result.columns, df.columns)
    pdt.assert_series_equal(
        result[target_col],
        pd.Series([400, 400, 545, 727, 909, 1000, 1000, 1000, 1000, 1000]))
Example #3
0
def test_scale_to_targets_from_table(df, target_col):
    targets = pd.DataFrame(
        {'column_name': [target_col, target_col],
         'target_value': [100, 1000],
         'target_metric': ['sum', 'sum'],
         'filters': ['geo_id == "a",filter_col < 106', 'geo_id == "b"'],
         'clip_low': [np.nan, np.nan],
         'clip_high': [np.nan, np.nan],
         'int_result': [np.nan, np.nan]})

    result = scl.scale_to_targets_from_table(df, targets)

    pdt.assert_index_equal(result.columns, df.columns)
    pdt.assert_series_equal(
        result[target_col],
        pd.Series(
            [11.11111111, 66.66666667, 33.33333333, 133.33333333, 55.55555556,
             200, 7, 266.66666667, 9, 333.33333333]),
        check_dtype=False)
Example #4
0
def test_scale_to_targets_from_table(df, target_col):
    targets = pd.DataFrame({
        'column_name': [target_col, target_col],
        'target_value': [100, 1000],
        'target_metric': ['sum', 'sum'],
        'filters': ['geo_id == "a",filter_col < 106', 'geo_id == "b"'],
        'clip_low': [np.nan, np.nan],
        'clip_high': [np.nan, np.nan],
        'int_result': [np.nan, np.nan]
    })

    result = scl.scale_to_targets_from_table(df, targets)

    pdt.assert_index_equal(result.columns, df.columns)
    pdt.assert_series_equal(result[target_col],
                            pd.Series([
                                11.11111111, 66.66666667, 33.33333333,
                                133.33333333, 55.55555556, 200, 7,
                                266.66666667, 9, 333.33333333
                            ]),
                            check_dtype=False)
    'int_result': [np.nan] * len(targetvalues)
})

targets_non_residential_sqft = pd.DataFrame({
    'column_name': ['non_residential_sqft'] * len(targetunits),
    'target_value':
    targetunits.targetnonressqft.values,
    'target_metric': ['sum'] * len(targetunits),
    'filters': ('(non_residential_sqft > 0) & (taz == ' +
                pd.Series(targetunits.index.values).astype('str')) + ')',
    'clip_low': [np.nan] * len(targetunits),
    'clip_high': [np.nan] * len(targetunits),
    'int_result': [np.nan] * len(targetunits)
})

buildings2 = scl.scale_to_targets_from_table(buildings2,
                                             targets_residential_year_built)

targets_non_residential_sqft['taz'] = targetunits.index.values
targets_non_residential_sqft = targets_non_residential_sqft.set_index('taz')
targets_non_residential_sqft['existing_nrsqft'] = buildings2.groupby(
    'taz').non_residential_sqft.sum()
targets_non_residential_sqft.target_value[
    targets_non_residential_sqft.target_value < targets_non_residential_sqft.
    existing_nrsqft] = targets_non_residential_sqft.existing_nrsqft[
        targets_non_residential_sqft.target_value <
        targets_non_residential_sqft.existing_nrsqft]
del targets_non_residential_sqft['existing_nrsqft']
buildings2 = scl.scale_to_targets_from_table(buildings2,
                                             targets_non_residential_sqft)

print buildings[buildings.building_sqft == 0].res_type.value_counts()
     'target_metric': ['mean']*len(targetvalues),
     'filters': ('(residential_units > 0) & (taz == ' + pd.Series(targetvalues.index.values).astype('str')) + ')',
     'clip_low': [np.nan]*len(targetvalues),
     'clip_high': [np.nan]*len(targetvalues),
     'int_result': [np.nan]*len(targetvalues)})

targets_non_residential_sqft = pd.DataFrame(
    {'column_name': ['non_residential_sqft']*len(targetunits),
     'target_value': targetunits.targetnonressqft.values,
     'target_metric': ['sum']*len(targetunits),
     'filters': ('(non_residential_sqft > 0) & (taz == ' + pd.Series(targetunits.index.values).astype('str')) + ')',
     'clip_low': [np.nan]*len(targetunits),
     'clip_high': [np.nan]*len(targetunits),
     'int_result': [np.nan]*len(targetunits)})

buildings2 = scl.scale_to_targets_from_table(buildings2, targets_residential_year_built)

buildings2 = scl.scale_to_targets_from_table(buildings2, targets_non_residential_sqft)

print buildings[buildings.building_sqft == 0].res_type.value_counts()  
print len(buildings2[(buildings2.building_sqft == 0) & (buildings2.res_type=='other')])

# Post scaling bound-checking
buildings2.year_built[buildings2.year_built > year_built_upper_bound] = year_built_upper_bound
buildings2.year_built[buildings2.year_built < year_built_lower_bound] = year_built_lower_bound


# COMPARE WITH TARGETS
targetunits['sf'] = buildings2[buildings2.res_type == 'single'].groupby('taz').residential_units.sum()
targetunits['mf'] = buildings2[buildings2.res_type == 'multi'].groupby('taz').residential_units.sum()
targetunits['nrsqft'] = buildings2.groupby('taz').non_residential_sqft.sum()
Example #7
0
sim_data = results.predict(resbuildings)
sim_data = np.exp(sim_data)
sim_data = pd.Series(sim_data, index = resbuildings.index)
buildings['res_price'] = 0
buildings['res_price_per_sqft'] = 0
buildings.loc[sim_data.index,'res_price'] = sim_data
#Now that regression equation is applied, scale residential prices to match zonal target
targets_residential_price = pd.DataFrame(
    {'column_name': ['res_price']*len(targetvalues),
     'target_value': targetvalues.salepr2010_av.values,
     'target_metric': ['mean']*len(targetvalues),
     'filters': ('(residential_units > 0) & (taz == ' + pd.Series(targetvalues.index.values).astype('str')) + ')',
     'clip_low': [np.nan]*len(targetvalues),
     'clip_high': [np.nan]*len(targetvalues),
     'int_result': [np.nan]*len(targetvalues)})
buildings = scl.scale_to_targets_from_table(buildings, targets_residential_price)

buildings.res_price_per_sqft[(buildings.res_price > 0) * (buildings.sqft_per_unit > 0)] = buildings.res_price/buildings.sqft_per_unit

#Nonresidential price imputation
nonresprice_estimation_dataset = buildings[(buildings.costar_property_type.str.len()>2) & (buildings.res_type == 'other') & (~buildings.costar_rent.isin(['', '-', 'Negotiable', 'Withheld']))]
nonresprice_estimation_dataset['observed_costar_rent'] = nonresprice_estimation_dataset.costar_rent.astype('float')

specification = 'np.log(observed_costar_rent) ~ non_residential_sqft + targetnonressqft + I(development_type_id == "OF") + I(development_type_id == "RT") + I(year_built < 1940) + I(year_built > 1990) + year_built + mean_income + mean_hhsize + mean_hhchildren + mean_numvehicles + mf_sf_ratio + resdensity + empdensity + nr_res_ratio + yearbuilt_av + yearbuilt_sd + stories + I(county_id == 1) + I(county_id == 13) + I(county_id == 41) + I(county_id == 55) + I(county_id == 85) + I(county_id == 81) + I(county_id == 95) + I(county_id == 97) + e11_10 + e21_10 + e22_10 + e23_10 + e3133_10 + e42_10 + e4445_10 + e4849_10 + e51_10 + e52_10 + e53_10 + e54_10 + e55_10 + e56_10 + e61_10 + e62_10 + e71_10 + e72_10 + e81_10 + e92_10 + etot_10'
model = smf.ols(formula=specification, data=nonresprice_estimation_dataset)
results = model.fit()
print results.summary()

nonresbuildings = buildings[(buildings.res_type == 'other') & (buildings.non_residential_sqft > 0)]
sim_data = results.predict(nonresbuildings)
sim_data = np.exp(sim_data)