def _allocate_households(households, persons, tract_controls): # Only take nonzero weights households = households[households[inputs.HOUSEHOLD_WEIGHT.name] > 0] # Initial weights from PUMS w = households[inputs.HOUSEHOLD_WEIGHT.name].as_matrix().T allocation_inputs = [inputs.NUM_PEOPLE, inputs.NUM_VEHICLES] # Hard-coded for now # Prepend column name to bin name to prevent bin collision hh_columns = [] for a_input in allocation_inputs: subset_values = households[a_input.name].unique().tolist() hh_columns += HouseholdAllocator._str_broadcast(a_input.name, subset_values) hh_columns = HouseholdAllocator._filter_sparse_columns(households, hh_columns) hh_table = households[hh_columns].as_matrix() A = tract_controls.data[hh_columns].as_matrix() n_tracts, n_controls = A.shape n_samples = len(households.index.values) # Control importance weights # < 1 means not important (thus relaxing the constraint in the solver) mu = np.mat([1] * n_controls) w_extend = np.tile(w, (n_tracts, 1)) mu_extend = np.mat(np.tile(mu, (n_tracts, 1))) B = np.mat(np.dot(np.ones((1, n_tracts)), A)[0]) # Our trade-off coefficient gamma # Low values (~1) mean we trust our initial weights, high values # (~10000) mean want to fit the marginals. gamma = 100. # Meta-balancing coefficient meta_gamma = 100. hh_weights = balance_multi_cvx( hh_table, A, B, w_extend, gamma * mu_extend.T, meta_gamma ) # We're running discretization independently for each tract tract_ids = tract_controls.data['TRACTCE'].values total_weights = np.zeros(hh_weights.shape) sample_weights_int = hh_weights.astype(int) discretized_hh_weights = discretize_multi_weights(hh_table, hh_weights) total_weights = sample_weights_int + discretized_hh_weights # Extend households and add the weights and ids households_extend = pandas.concat([households] * n_tracts) households_extend[inputs.COUNT.name] = total_weights.flatten().T tracts = np.repeat(tract_ids, n_samples) households_extend[inputs.TRACT.name] = tracts return households_extend, persons
def _allocate_households(households, persons, tract_controls): # Only take nonzero weights households = households[households[inputs.HOUSEHOLD_WEIGHT.name] > 0] # Initial weights from PUMS w = households[inputs.HOUSEHOLD_WEIGHT.name].as_matrix().T hh_columns = ['1', '2', '3', '4+'] hh_table = households[hh_columns].as_matrix() A = tract_controls.data[hh_columns].as_matrix() n_tracts, n_controls = A.shape n_samples = len(households.index.values) # Control importance weights # < 1 means not important (thus relaxing the contraint in the solver) mu = np.mat([1] * n_controls) w_extend = np.tile(w, (n_tracts, 1)) mu_extend = np.mat(np.tile(mu, (n_tracts, 1))) B = np.mat(np.dot(np.ones((1, n_tracts)), A)[0]) # Our trade-off coefficient gamma # Low values (~1) mean we trust our initial weights, high values # (~10000) mean want to fit the marginals. gamma = 100. # Meta-balancing coefficient meta_gamma = 100. hh_weights, z, q = balance_multi_cvx(hh_table, A, B, w_extend, gamma * mu_extend.T, meta_gamma) # We're running discretization independently for each tract tract_ids = tract_controls.data['TRACTCE'].values total_weights = np.zeros(hh_weights.shape) sample_weights_int = hh_weights.astype(int) discretized_hh_weights = discretize_multi_weights(hh_table, hh_weights) total_weights = sample_weights_int + discretized_hh_weights # Extend households and add the weights and ids households_extend = pandas.concat([households] * n_tracts) households_extend['count'] = total_weights.flatten().T tracts = np.repeat(tract_ids, n_samples) households_extend['tract'] = tracts return households_extend, persons
def test_discretize_multi_zero_weights(self): hh_table, hh_weights, expected_hh_discretized = self._mock_hh_weights_zeroed( ) hh_discretized = listbalancer.discretize_multi_weights( hh_table, hh_weights) np.testing.assert_array_equal(hh_discretized, expected_hh_discretized)