def compute(self, dataset_pool): inc = reshape(self.get_dataset().get_attribute_of_dataset(self.hh_income), (self.get_dataset().get_reduced_n(), 1)) cost = self.get_dataset().get_2d_dataset_attribute(self.housing_cost) I = inc-cost affordability = where(I > 0, ln(I+1),I) return affordability
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='land_covers', table_data={ 'relative_x': array([1, 2, 1, 2]), 'relative_y': array([1, 1, 2, 2]), "lct": array([10, 10, 4, 3]) }) dataset_pool = DatasetPool(package_order=['biocomplexity'], storage=storage) footprint = array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]) dataset_pool._add_dataset('constant', { "FOOTPRINT": footprint, 'AG': 10, }) gridcell = dataset_pool.get_dataset('land_cover') gridcell.compute_variables(self.variable_name, dataset_pool=dataset_pool) values = gridcell.get_attribute(self.variable_name) should_be = array([[1, 1], [0, 0]]) should_be = correlate(ma.filled(should_be.astype(int32), 0), footprint, mode="reflect") should_be = less_equal((should_be / 5.0), 400) should_be = ln(distance_transform_edt(should_be) + 1) / dag.standardization_constant_distance should_be = ravel(transpose(should_be)) # flatten by id self.assert_(ma.allclose(values, should_be, rtol=1e-7), msg="Error in " + self.variable_name)
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='land_covers', table_data={ 'relative_x': array([1,2,1,2]), 'relative_y': array([1,1,2,2]), "lct": array([11,2,4,3]) } ) dataset_pool = DatasetPool(package_order=['biocomplexity'], storage=storage) dataset_pool._add_dataset( 'constant', { "CELLSIZE":30, "ALL_URBAN":['HU', 'MU', 'LU'], 'HU': 1, 'MU': 2, 'LU': 3 } ) gridcell = dataset_pool.get_dataset('land_cover') gridcell.compute_variables(self.variable_name, dataset_pool=dataset_pool) values = gridcell.get_attribute(self.variable_name) should_be = array([1, 0, 1, 0], dtype=float32) should_be = ln(30*distance_transform_edt(should_be)+1) / ddt1.standardization_constant_distance self.assert_(ma.allclose( values, should_be, rtol=1e-7), msg = "Error in " + self.variable_name)
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='land_covers', table_data={ 'relative_x': array([1,2,1,2]), 'relative_y': array([1,1,2,2]), "comm_add4": array([1, 2, 5, 15]) } ) dataset_pool = DatasetPool(package_order=['biocomplexity'], storage=storage) dataset_pool._add_dataset( 'constant', { "CELLSIZE": 250 # this results in a 3x3 grid, (750/250)x(750/250) } ) gridcell = dataset_pool.get_dataset('land_cover') gridcell.compute_variables(self.variable_name, dataset_pool=dataset_pool) values = gridcell.get_attribute(self.variable_name) should_be = array([1*4+2*2+5*2+15, 1*2+2*4+5+15*2, 1*2+2+5*4+15*2, 1+2*2+5*2+15*4]) should_be = ln(should_be + 1) / 10.0 self.assert_(ma.allclose( values, should_be, rtol=1e-7), msg = "Error in " + self.variable_name)
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='land_covers', table_data={ 'relative_x': array([1, 2, 1, 2]), 'relative_y': array([1, 1, 2, 2]), "lct": array([11, 2, 4, 3]) }) dataset_pool = DatasetPool(package_order=['biocomplexity'], storage=storage) dataset_pool._add_dataset( 'constant', { "CELLSIZE": 30, "ALL_URBAN": ['HU', 'MU', 'LU'], 'HU': 1, 'MU': 2, 'LU': 3 }) gridcell = dataset_pool.get_dataset('land_cover') gridcell.compute_variables(self.variable_name, dataset_pool=dataset_pool) values = gridcell.get_attribute(self.variable_name) should_be = array([1, 0, 1, 0], dtype=float32) should_be = ln(30 * distance_transform_edt(should_be) + 1) / ddt1.standardization_constant_distance self.assert_(ma.allclose(values, should_be, rtol=1e-7), msg="Error in " + self.variable_name)
def compute(self, dataset_pool): cellsize = dataset_pool.get_dataset('constants')['CELLSIZE'] fpdimension = int(self.footprint_width / cellsize) fp = ones((fpdimension, fpdimension), dtype="int32") summed = correlate( ma.filled( self.get_dataset().get_2d_attribute( self.comm_add ), 0.0 ), \ fp, mode="reflect" ) return ln(self.get_dataset().flatten_by_id(summed)+1)/10.0
def test_my_inputs_for_hmps(self): variable_name = "biocomplexity.land_cover.hmps" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='land_covers', table_data={ 'relative_x': array([1, 2, 1, 2]), 'relative_y': array([1, 1, 2, 2]), "lct": array([1, 2, 1, 4]), }) dataset_pool = DatasetPool(package_order=['biocomplexity'], storage=storage) dataset_pool._add_dataset( 'constant', { "FOOTPRINT": array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]), 'HU': 1, }) land_cover = dataset_pool.get_dataset('land_cover') land_cover.compute_variables(variable_name, dataset_pool=dataset_pool) values = land_cover.get_attribute(variable_name) should_be = array([2, 2, 2, 2], dtype=float32) should_be = ln(should_be + 1) / SSSmps.standardization_constant_MPS self.assert_(ma.allclose(values, should_be, rtol=1e-7), msg="Error in " + variable_name)
def _run_stochastic_test_poisson( self, function, expected_results, number_of_iterations, significance_level=0.01, transformation=None ): """ Run the given function for the specified number_of_iterations. Uses Bayesian statistics to determine whether the produced results are within the specified significance_level of the expected_results. """ K = expected_results.size sum_y = zeros(K, dtype=float32) x_kr = zeros((number_of_iterations, K), dtype=float32) for i in range(number_of_iterations): y_r = function() x_kr[i, :] = try_transformation(y_r, transformation) sum_y = sum_y + x_kr[i, :] lambdak = sum_y / float(number_of_iterations) lambdanull = try_transformation(expected_results.astype(float32), transformation) # print lambdak # print lambdanull sumxk = sum(x_kr, axis=0) LRTS = 2.0 * ( (number_of_iterations * (lambdanull - lambdak).sum()) + (ln(lambdak / ma.masked_where(lambdanull == 0, lambdanull)) * sumxk).sum() ) prob = chisqprob(LRTS, K) # print LRTS, prob logger.log_status("Stochastic Test Poisson: LRTS=" + str(LRTS) + ", df=", str(K), ", p=" + str(prob)) return (prob >= significance_level, "prob=%f < significance level of %f" % (prob, significance_level))
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='land_covers', table_data={ 'relative_x': array([1, 2, 1, 2]), 'relative_y': array([1, 1, 2, 2]), "comm_add4": array([1, 2, 5, 15]) }) dataset_pool = DatasetPool(package_order=['biocomplexity'], storage=storage) dataset_pool._add_dataset( 'constant', { "CELLSIZE": 250 # this results in a 3x3 grid, (750/250)x(750/250) }) gridcell = dataset_pool.get_dataset('land_cover') gridcell.compute_variables(self.variable_name, dataset_pool=dataset_pool) values = gridcell.get_attribute(self.variable_name) should_be = array([ 1 * 4 + 2 * 2 + 5 * 2 + 15, 1 * 2 + 2 * 4 + 5 + 15 * 2, 1 * 2 + 2 + 5 * 4 + 15 * 2, 1 + 2 * 2 + 5 * 2 + 15 * 4 ]) should_be = ln(should_be + 1) / 10.0 self.assert_(ma.allclose(values, should_be, rtol=1e-7), msg="Error in " + self.variable_name)
def compute(self, dataset_pool): cellsize = dataset_pool.get_dataset('constants')['CELLSIZE'] fpdimension = int(self.footprint_width / cellsize) fp = ones((fpdimension, fpdimension), dtype="int32") summed = correlate( ma.filled( self.get_dataset().get_2d_attribute( self.comm_add ), 0.0 ), \ fp, mode="reflect" ) return ln(self.get_dataset().flatten_by_id(summed) + 1) / 10.0
def test_my_inputs_for_hmps(self): variable_name = "biocomplexity.land_cover.hmps" storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='land_covers', table_data={ 'relative_x': array([1,2,1,2]), 'relative_y': array([1,1,2,2]), "lct": array([1, 2, 1, 4]), } ) dataset_pool = DatasetPool(package_order=['biocomplexity'], storage=storage) dataset_pool._add_dataset( 'constant', { "FOOTPRINT": array([[0,1,0], [1,1,1], [0,1,0]]), 'HU': 1, } ) land_cover = dataset_pool.get_dataset('land_cover') land_cover.compute_variables(variable_name, dataset_pool=dataset_pool) values = land_cover.get_attribute(variable_name) should_be = array([2, 2, 2, 2], dtype=float32) should_be = ln(should_be + 1) / SSSmps.standardization_constant_MPS self.assert_(ma.allclose( values, should_be, rtol=1e-7), msg = "Error in " + variable_name)
def compute(self, dataset_pool): inc = reshape( self.get_dataset().get_attribute_of_dataset(self.hh_income), (self.get_dataset().get_reduced_n(), 1)) cost = self.get_dataset().get_2d_dataset_attribute(self.housing_cost) I = inc - cost affordability = where(I > 0, ln(I + 1), I) return affordability
def test_my_inputs(self): values = VariableTestToolbox().compute_variable(self.variable_name, {"building":{ "land_value": array([10, 5, 0])}, }, dataset = "building") should_be = ln(array([10, 5, 0])) self.assertEqual(ma.allclose(values, should_be, rtol=1e-2), True, msg = "Error in " + self.variable_name)
def compute(self, dataset_pool): constants = dataset_pool.get_dataset('constants') ag = constants["AG"] lct = ma.filled(self.get_dataset().get_2d_attribute(self.land_cover_type), 0) footprint = constants['FOOTPRINT'] is_lct_ag = lct==ag summed = self._compute_patch_size_of_cover_types(is_lct_ag, footprint) summed = less_equal(summed, 400).astype(bool8) distances = distance_transform_edt(summed) return self.get_dataset().flatten_by_id(ln(distances + 1) / self.standardization_constant_distance )
def compute(self, dataset_pool): constants = dataset_pool.get_dataset('constants') cellsize = constants["CELLSIZE"] all_urban_types = constants["ALL_URBAN"] all_urban_types = map(lambda key: constants[key], all_urban_types) lct = ma.filled(self.get_dataset().get_2d_attribute(self.land_cover_type), 0) is_lct_all_urban = zeros(shape=lct.shape, dtype="int32") for urban_type in all_urban_types: is_lct_all_urban += equal(lct, urban_type) temp = logical_not(is_lct_all_urban) dd = ln(cellsize*(distance_transform_edt(temp))+1) / self.standardization_constant_distance return self.get_dataset().flatten_by_id(dd).astype(float32)
def compute(self, dataset_pool): constants = dataset_pool.get_dataset('constants') ag = constants["AG"] lct = ma.filled( self.get_dataset().get_2d_attribute(self.land_cover_type), 0) footprint = constants['FOOTPRINT'] is_lct_ag = lct == ag summed = self._compute_patch_size_of_cover_types(is_lct_ag, footprint) summed = less_equal(summed, 400).astype(bool8) distances = distance_transform_edt(summed) return self.get_dataset().flatten_by_id( ln(distances + 1) / self.standardization_constant_distance)
def compute(self, dataset_pool): constants = dataset_pool.get_dataset('constants') covertypes_of_interest = self._cover_type_translation(constants) lct = ma.filled(self.get_dataset().get_2d_attribute(self.land_cover_type), 0) footprint = constants['FOOTPRINT'] is_lct_of_interest = reduce(lambda prev_answer, lct_num: logical_or(prev_answer, lct==lct_num), covertypes_of_interest, zeros(shape=lct.shape, dtype=int32)) summed = self._compute_patch_size_of_cover_types(is_lct_of_interest, footprint) return self.get_dataset().flatten_by_id(ln(summed + 1) / self.standardization_constant_MPS )
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='gridcells', table_data={ 'grid_id': array([1,2,3]), 'housing_cost': array([42, 84, 2]), } ) storage.write_table( table_name='households', table_data={ 'household_id': array([1, 2, 3]), 'income': array([42, 77, 99]), } ) dataset_pool = DatasetPool(package_order=['urbansim'], storage=storage) household_x_gridcell = dataset_pool.get_dataset('household_x_gridcell') household_x_gridcell.compute_variables(self.variable_name, dataset_pool=dataset_pool) values = household_x_gridcell.get_attribute(self.variable_name) should_be = array([[ 0, -42, ln(41)], [ ln(36), -7, ln(76)], [ ln(58), ln(16), ln(98)]]) self.assert_(ma.allclose(values, should_be, rtol=1e-7), msg="Error in " + self.variable_name)
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='gridcells', table_data={ 'grid_id': array([1, 2, 3]), 'housing_cost': array([42, 84, 2]), }) storage.write_table(table_name='households', table_data={ 'household_id': array([1, 2, 3]), 'income': array([42, 77, 99]), }) dataset_pool = DatasetPool(package_order=['urbansim'], storage=storage) household_x_gridcell = dataset_pool.get_dataset('household_x_gridcell') household_x_gridcell.compute_variables(self.variable_name, dataset_pool=dataset_pool) values = household_x_gridcell.get_attribute(self.variable_name) should_be = array([[0, -42, ln(41)], [ln(36), -7, ln(76)], [ln(58), ln(16), ln(98)]]) self.assert_(ma.allclose(values, should_be, rtol=1e-7), msg="Error in " + self.variable_name)
def test_my_inputs(self): avg_value = array([21,22,27,42]) some_gridcell_zone_ids = array([1,2,1,3]) grid_id = array([1,2,3,4]) values = VariableTestToolbox().compute_variable(self.variable_name, {"zone":{ "zone_id":array([1,2, 3]), "avg_val_per_unit_commercial": array([10, 5, 0])}, }, dataset = "zone") should_be = ln(array([10, 5, 0])) self.assertEqual(ma.allclose(values, should_be, rtol=1e-2), True, msg = "Error in " + self.variable_name)
def compute(self, dataset_pool): constants = dataset_pool.get_dataset('constants') cellsize = constants["CELLSIZE"] all_urban_types = constants["ALL_URBAN"] all_urban_types = map(lambda key: constants[key], all_urban_types) lct = ma.filled( self.get_dataset().get_2d_attribute(self.land_cover_type), 0) is_lct_all_urban = zeros(shape=lct.shape, dtype="int32") for urban_type in all_urban_types: is_lct_all_urban += equal(lct, urban_type) temp = logical_not(is_lct_all_urban) dd = ln(cellsize * (distance_transform_edt(temp)) + 1) / self.standardization_constant_distance return self.get_dataset().flatten_by_id(dd).astype(float32)
def compute(self, dataset_pool): constants = dataset_pool.get_dataset('constants') covertypes_of_interest = self._cover_type_translation(constants) lct = ma.filled( self.get_dataset().get_2d_attribute(self.land_cover_type), 0) footprint = constants['FOOTPRINT'] is_lct_of_interest = reduce( lambda prev_answer, lct_num: logical_or(prev_answer, lct == lct_num ), covertypes_of_interest, zeros(shape=lct.shape, dtype=int32)) summed = self._compute_patch_size_of_cover_types( is_lct_of_interest, footprint) return self.get_dataset().flatten_by_id( ln(summed + 1) / self.standardization_constant_MPS)
def _run_stochastic_test_poisson(self, function, expected_results, number_of_iterations, significance_level=0.01, transformation=None): """ Run the given function for the specified number_of_iterations. Uses Bayesian statistics to determine whether the produced results are within the specified significance_level of the expected_results. """ K = expected_results.size sum_y = zeros(K, dtype=float32) x_kr = zeros((number_of_iterations, K), dtype=float32) for i in range(number_of_iterations): y_r = function() x_kr[i, :] = try_transformation(y_r, transformation) sum_y = sum_y + x_kr[i, :] lambdak = sum_y / float(number_of_iterations) lambdanull = try_transformation(expected_results.astype(float32), transformation) # print lambdak # print lambdanull sumxk = sum(x_kr, axis=0) LRTS = 2.0 * ( (number_of_iterations * (lambdanull - lambdak).sum()) + (ln(lambdak / ma.masked_where(lambdanull == 0, lambdanull)) * sumxk).sum()) prob = chisqprob(LRTS, K) #print LRTS, prob logger.log_status( "Stochastic Test Poisson: LRTS=" + str(LRTS) + ", df=", str(K), ", p=" + str(prob)) return (prob >= significance_level, "prob=%f < significance level of %f" % (prob, significance_level))
def test_my_inputs(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='land_covers', table_data={ 'relative_x': array([1,2,1,2]), 'relative_y': array([1,1,2,2]), "lct": array([10,10,4,3]) } ) dataset_pool = DatasetPool(package_order=['biocomplexity'], storage=storage) footprint = array([[0,1,0], [1,1,1], [0,1,0]]) dataset_pool._add_dataset( 'constant', { "FOOTPRINT": footprint, 'AG': 10, } ) gridcell = dataset_pool.get_dataset('land_cover') gridcell.compute_variables(self.variable_name, dataset_pool=dataset_pool) values = gridcell.get_attribute(self.variable_name) should_be = array([[1,1],[0,0]]) should_be = correlate(ma.filled(should_be.astype(int32), 0), footprint, mode="reflect") should_be = less_equal((should_be/5.0), 400) should_be = ln(distance_transform_edt(should_be)+1) / dag.standardization_constant_distance should_be = ravel(transpose(should_be)) # flatten by id self.assert_(ma.allclose( values, should_be, rtol=1e-7), msg = "Error in " + self.variable_name)
def run(self, data, upc_sequence, resources=None): self.mnl_probabilities=upc_sequence.probability_class self.bhhh_estimation = bhhh_mnl_estimation() modified_upc_sequence = UPCFactory().get_model( utilities=None, probabilities="opus_core.mnl_probabilities", choices=None) modified_upc_sequence.utility_class = upc_sequence.utility_class N, neqs, V = data.shape max_iter = resources.get("max_iterations", 100) # default sc = SessionConfiguration() dataset_pool = sc.get_dataset_pool() sample_rate = dataset_pool.get_dataset("sample_rate") CLOSE = sc["CLOSE"] info_filename = sc["info_file"] info_filename = os.path.join('.', info_filename) info_file = open(info_filename, "a") constraint_dict = {1:'constrained', 0:'unconstrained'} swing_cases_fix = 0 #set swing alternatives to constrained (1) or unconstrained (0) prob_correlation = None choice_set = resources['_model_'].choice_set J = choice_set.size() alt_id = choice_set.get_id_attribute() movers = choice_set.get_attribute('movers') resources.check_obligatory_keys(["capacity_string"]) supply = choice_set.get_attribute(resources["capacity_string"]) index = resources.get("index", None) if index is None: # no sampling case, alternative set is the full choice_set index = arange(J) if index.ndim <= 1: index = repeat(index[newaxis,:], N, axis=0) if resources.get('aggregate_to_dataset', None): aggregate_dataset = dataset_pool.get_dataset(resources.get('aggregate_to_dataset')) choice_set_aggregate_id = choice_set.get_attribute(aggregate_dataset.get_id_name()[0]) index = aggregate_dataset.get_id_index(choice_set_aggregate_id[index].ravel()).reshape(index.shape) supply = aggregate_dataset.get_attribute(resources["capacity_string"]) J = aggregate_dataset.size() movers = aggregate_dataset.get_attribute("movers") demand_history = movers[:, newaxis] resources.merge({"index":index}) pi = ones(index.shape, dtype=float32) #initialize pi #average_omega = ones(J,dtype=float32) #initialize average_omega logger.start_block('Outer Loop') for i in range(max_iter): logger.log_status('Outer Loop Iteration %s' % i) result = self.bhhh_estimation.run(data, modified_upc_sequence, resources) del self.bhhh_estimation; collect() self.bhhh_estimation = bhhh_mnl_estimation() probability = modified_upc_sequence.get_probabilities() if data.shape[2] == V: #insert a placeholder for ln(pi) in data data = concatenate((data,ones((N,neqs,1),dtype=float32)), axis=2) coef_names = resources.get("coefficient_names") coef_names = concatenate( (coef_names, array(["ln_pi"])) ) resources.merge({"coefficient_names":coef_names}) else: beta_ln_pi = result['estimators'][where(coef_names == 'ln_pi')][0] logger.log_status("mu = 1/%s = %s" % (beta_ln_pi, 1/beta_ln_pi)) prob_hat = safe_array_divide(probability, pi ** beta_ln_pi) #prob_hat = safe_array_divide(probability, pi) prob_hat_sum = prob_hat.sum(axis=1, dtype=float32) if not ma.allclose(prob_hat_sum, 1.0): logger.log_status("probability doesn't sum up to 1, with minimum %s, and maximum %s" % (prob_hat_sum.min(), prob_hat_sum.max())) probability = normalize(prob_hat) demand = self.mnl_probabilities.get_demand(index, probability, J) * 1 / sample_rate demand_history = concatenate((demand_history, demand[:, newaxis]), axis=1) sdratio = safe_array_divide(supply, demand, return_value_if_denominator_is_zero=2.0) sdratio_matrix = sdratio[index] ## debug info from numpy import histogram from opus_core.misc import unique cc = histogram(index.ravel(), unique(index.ravel()))[0] logger.log_status( "=================================================================") logger.log_status( "Probability min: %s, max: %s" % (probability.min(), probability.max()) ) logger.log_status( "Demand min: %s, max: %s" % (demand.min(), demand.max()) ) logger.log_status( "sdratio min: %s, max: %s" % (sdratio.min(), sdratio.max()) ) logger.log_status( "demand[sdratio==sdratio.min()]=%s" % demand[sdratio==sdratio.min()] ) logger.log_status( "demand[sdratio==sdratio.max()]=%s" % demand[sdratio==sdratio.max()] ) logger.log_status( "Counts of unique submarkets in alternatives min: %s, max: %s" % (cc.min(), cc.max()) ) logger.log_status( "=================================================================") constrained_locations_matrix, omega, info = self.inner_loop(supply, demand, probability, index, sdratio_matrix, J, max_iteration=max_iter) inner_iterations, constrained_locations_history, swing_index, average_omega_history = info for idx in swing_index: logger.log_status("swinging alt with id %s set to %s" % (alt_id[idx], constraint_dict[swing_cases_fix])) constrained_locations_matrix[index==idx] = swing_cases_fix if swing_index.size > 0: info_file.write("swing of constraints found with id %s \n" % alt_id[swing_index]) info_file.write("outer_iteration, %i, " % i + ", ".join([str(i)]*(len(inner_iterations))) + "\n") info_file.write("inner_iteration, , " + ", ".join(inner_iterations) + "\n") info_file.write("id, sdratio, " + ", ".join(["avg_omega"]*len(inner_iterations)) + "\n") for idx in swing_index: line = str(alt_id[idx]) + ',' line += str(sdratio[idx]) + ',' line += ",".join([str(x) for x in average_omega_history[idx,]]) line += "\n" info_file.write(line) info_file.write("\n") info_file.flush() outer_iterations = [str(i)] * len(inner_iterations) prob_min = [str(probability.min())] * len(inner_iterations) prob_max = [str(probability.max())] * len(inner_iterations) pi_new = self.mnl_probabilities.get_pi(sdratio_matrix, omega, constrained_locations_matrix) data[:,:,-1] = ln(pi_new) #diagnostic output if not ma.allclose(pi, pi_new, atol=CLOSE): if i > 0: #don't print this for the first iteration logger.log_status("min of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).min()) logger.log_status("max of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).max()) logger.log_status("mean of pi(l+1) - pi(l): %s" % (pi_new - pi).mean()) logger.log_status('Standard Deviation pi(l+1) - pi(l): %s' % standard_deviation(pi_new - pi)) logger.log_status('correlation of pi(l+1) and pi(l): %s' % corr(pi_new.ravel(), pi.ravel())[0,1]) pi = pi_new probability_old = probability # keep probability of the previous loop, for statistics computation only else: #convergence criterion achieved, quiting outer loop logger.log_status("pi(l) == pi(l+1): Convergence criterion achieved") info_file.write("\nConstrained Locations History:\n") info_file.write("outer_iteration," + ",".join(outer_iterations) + "\n") info_file.write("inner_iteration," + ",".join(inner_iterations) + "\n") info_file.write("minimum_probability," + ",".join(prob_min) + "\n") info_file.write("maximum_probability," + ",".join(prob_max) + "\n") for row in range(J): line = [str(x) for x in constrained_locations_history[row,]] info_file.write(str(alt_id[row]) + "," + ",".join(line) + "\n") info_file.flush() info_file.write("\nDemand History:\n") i_str = [str(x) for x in range(i)] info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n") #info_file.write(", ,\n") for row in range(J): line = [str(x) for x in demand_history[row,]] info_file.write(str(alt_id[row]) + "," + ",".join(line) + "\n") demand_history_info_criteria = [500, 100, 50, 20] for criterion in demand_history_info_criteria: com_rows_index = where(movers <= criterion)[0] info_file.write("\nDemand History for alternatives with less than or equal to %s movers in 1998:\n" % criterion) i_str = [str(x) for x in range(i)] info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n") #info_file.write(", movers,\n") for row in com_rows_index: line = [str(x) for x in demand_history[row,]] info_file.write(str(alt_id[row]) + "," + ",".join(line) + "\n") #import pdb; pdb.set_trace() #export prob correlation history correlation_indices, prob_correlation = self.compute_prob_correlation(probability_old, probability, prob_hat, index, resources) info_file.write("\nCorrelation of Probabilities:\n") c_name = ['corr(p_ij p~_ij)', 'corr(p_ij p^_ij)', 'corr(p_ij dummy)', 'corr(p~_ij p^_ij)', 'corr(p~_ij dummy)', 'corr(p^_ij dummy)'] info_file.write("com_id, " + ",".join(c_name) + "\n") #info_file.write(", ,\n") for row in range(correlation_indices.size): line = [str(x) for x in prob_correlation[row,]] info_file.write(str(alt_id[correlation_indices[row]]) + "," + ",".join(line) + "\n") info_file.close() result['pi'] = pi return result logger.end_block() try:info_file.close() except:pass raise RuntimeError, "max iteration reached without convergence."
def run(self, data, upc_sequence, resources=None): CLOSE = 0.01 self.mnl_probabilities=upc_sequence.probability_class self.bhhh_estimation = bhhh_mnl_estimation() modified_upc_sequence = UPCFactory().get_model( utilities=None, probabilities="opus_core.mnl_probabilities", choices=None) modified_upc_sequence.utility_class = upc_sequence.utility_class result = self.bhhh_estimation.run(data, modified_upc_sequence, resources) probability = modified_upc_sequence.get_probabilities() probability_0 = probability resources.check_obligatory_keys(["capacity"]) supply = resources["capacity"] if not isinstance(supply, ndarray): supply = array(supply) nsupply = supply.size max_iter = resources.get("max_iterations", None) if max_iter == None: max_iter = 100 # default index = resources.get("index", None) if index == None: index = arange(nsupply) neqs = probability.shape[1] nobs = probability.shape[0] if index.ndim <= 1: index = repeat(reshape(index, (1,index.shape[0])), nobs) resources.merge({"index":index}) # WARNING: THE SCALING OF DEMAND IS HARD CODED AND NEEDS TO BE MADE AN ARGUMENT # scale demand to represent 100% from a 0.2% sample demand = self.mnl_probabilities.get_demand(index, probability, nsupply)*50 #initial calculations sdratio = ma.filled(supply/ma.masked_where(demand==0, demand),2.0) sdratio = _round(sdratio, 1.0, atol=CLOSE) constrained_locations = logical_and(sdratio<1.0,demand-supply>CLOSE).astype("int8") unconstrained_locations = 1-constrained_locations excess_demand = (demand-supply)*constrained_locations global_excess_demand = excess_demand.sum() sdratio_matrix = sdratio[index] constrained_locations_matrix = constrained_locations[index] constrained_ex_ante = constrained_locations_matrix # Would like to include following print statements in debug printing # logger.log_status('Total demand:',demand.sum()) # logger.log_status('Total supply:',supply.sum()) logger.log_status('Global excess demand:',global_excess_demand) # logger.log_status('Constrained locations:',constrained_locations.sum()) unconstrained_locations_matrix = unconstrained_locations[index] # omega = ones(nobs,type=float32) # pi = self.constrain_probabilities.get_pi(sdratio_matrix, omega, constrained_locations_matrix, unconstrained_locations_matrix, nobs) omega = self.mnl_probabilities.get_omega(probability, constrained_locations_matrix, unconstrained_locations_matrix, sdratio_matrix) omega = _round(omega, 1.0, CLOSE) print 'Num of constrainted locations: ', constrained_locations.sum() print 'Num of unconstrainted locations: ', unconstrained_locations.sum() print 'Min Ex Ante Constraints:',min(constrained_ex_ante.sum(axis=1)) print 'Max Ex Ante Constraints:',max(constrained_ex_ante.sum(axis=1)) #print 'Omega shape',omega.shape #print 'Omega histogram',histogram(omega,0,4,40) print 'Minimum Omega',min(omega) print 'Maximum Omega',max(omega) print 'Mean Omega:',mean(omega) print 'Median Omega:',median(omega) print 'Sum Omega:',omega.sum() print 'Standard Deviation Omega:',standard_deviation(omega) print 'Count of Negative Omega',(where(omega<0,1,0).sum()) print 'Count of Omega < 1',(where(omega<1,1,0).sum()) print 'Count of Omega > 2',(where(omega>2,1,0).sum()) print 'Count of Omega > 4',(where(omega>4,1,0).sum()) average_omega = self.mnl_probabilities.get_average_omega(omega, probability, index, nsupply, nobs, demand) average_omega=_round(average_omega, 1.0, CLOSE) coef_names = resources.get("coefficient_names", None) if coef_names is not None: coef_names = array(coef_names.tolist()+["ln_pi"]) resources.merge({"coefficient_names":coef_names}) data=concatenate((data,ones((nobs,neqs,1),dtype=float32)), axis=2) prev_omega = omega prev_constrained_locations_matrix = constrained_locations_matrix for i in range(max_iter): print print 'Iteration',i pi = self.mnl_probabilities.get_pi(sdratio_matrix, omega, constrained_locations_matrix, unconstrained_locations_matrix, nobs) #print 'pi shape',pi.shape #print 'data shape', data.shape #print 'min_pi',min(pi,axis=1) #print 'max_pi',max(pi,axis=1) #print 'min_data',min(data,axis=1) #print 'max_data',max(data,axis=1) data[:,:,-1] = ln(pi) #data = concatenate((data,(pi[:,:,newaxis])),axis=-1) #print 'data shape after contatenating pi', data.shape result = self.bhhh_estimation.run(data, modified_upc_sequence, resources) #print #print 'result',result probability = modified_upc_sequence.get_probabilities() prob_hat = ma.filled(probability / pi, 0.0) # HARD CODED # scale new_demand from 0.2% to 100% demand_new = self.mnl_probabilities.get_demand(index, prob_hat, nsupply)*50 ##update supply-demand ratio sdratio = ma.filled(supply/ma.masked_where(demand_new==0, demand_new),2.0) sdratio = _round(sdratio, 1.0, CLOSE) sdratio_matrix = sdratio[index] constrained_locations = where(((average_omega*demand_new - supply) > CLOSE),1,0) unconstrained_locations = 1-constrained_locations constrained_locations_matrix = constrained_locations[index] unconstrained_locations_matrix = unconstrained_locations[index] constrained_ex_post = constrained_locations_matrix constrained_ex_post_not_ex_ante = where((constrained_ex_post - constrained_ex_ante)==1,1,0) constrained_ex_ante_not_ex_post = where((constrained_ex_post - constrained_ex_ante)==-1,1,0) #Assumption 5: if j belongs to constrained ex post and unconstrained ex ante, then p^i_j <= D_j / S_j print 'Number of individual violating Assumption 5: ', where((probability > 1 / sdratio_matrix)*constrained_ex_post_not_ex_ante)[0].size #Assumption 6: pi of constrained locations should be less than 1 print 'Number of individual violating Assumption 6: ', where((probability * constrained_ex_post).sum(axis=1) > (prob_hat * constrained_ex_post).sum(axis=1))[0].size ##OR ? #print 'Assumption 6: ', where(pi[where(constrained_locations_matrix)] > 1)[0].size print 'number of constrainted locations: ', constrained_locations.sum() print 'number of unconstrainted locations: ', unconstrained_locations.sum() print 'Min Ex Post Constraints:',min(constrained_ex_post.sum(axis=1)) print 'Max Ex Post Constraints:',max(constrained_ex_post.sum(axis=1)) print 'At Least 1 Constrained Ex Ante Not Ex Post*:',where(constrained_ex_ante_not_ex_post.sum(axis=1))[0].size print 'At Least 1 Constrained Ex Post Not Ex Ante:',where(constrained_ex_post_not_ex_ante.sum(axis=1))[0].size omega = self.mnl_probabilities.get_omega(prob_hat, constrained_locations_matrix, unconstrained_locations_matrix, sdratio_matrix) omega = _round(omega, 1.0, CLOSE) #print 'Omega histogram',histogram(omega,0,4,40) print 'Minimum Omega',min(omega) print 'Maximum Omega',max(omega) print 'Mean Omega:',mean(omega) print 'Median Omega:',median(omega) print 'Sum Omega:',omega.sum() print 'Standard Deviation Omega:',standard_deviation(omega) print 'Count of Negative Omega',(where(omega<0,1,0).sum()) print 'Count of Omega < 1: ',(where(omega<1,1,0).sum()) print 'Count of Omega > 2: ',(where(omega>2,1,0).sum()) print 'Count of Omega > 4: ',(where(omega>4,1,0).sum()) average_omega = self.mnl_probabilities.get_average_omega(omega, prob_hat, index, nsupply, nobs, demand_new) average_omega = _round(average_omega, 1.0, CLOSE) excess_demand = (demand_new-supply)*constrained_locations global_excess_demand = excess_demand.sum() #print 'Omega [i], [i-1]',prev_omega, omega, #print 'Constrained locations [i], [i-1]',constrained_locations_matrix, prev_constrained_locations_matrix print 'Global Excess Demand',global_excess_demand if ma.allclose(omega, prev_omega, atol=1e-3) or not any(constrained_locations_matrix - prev_constrained_ex_ante): print 'omega or constrained ex post unchanged: Convergence criterion achieved' break #if global_excess_demand < 1: #print 'Global excess demand < 1: Convergence criterion achieved' #break return result
def compute(self, dataset_pool): return ln(self.get_dataset().get_attribute(self.income))
def run(self, data, upc_sequence, resources=None): self.mnl_probabilities = upc_sequence.probability_class self.bhhh_estimation = bhhh_mnl_estimation() modified_upc_sequence = UPCFactory().get_model( utilities=None, probabilities="opus_core.mnl_probabilities", choices=None) modified_upc_sequence.utility_class = upc_sequence.utility_class N, neqs, V = data.shape max_iter = resources.get("max_iterations", 100) # default sc = SessionConfiguration() dataset_pool = sc.get_dataset_pool() sample_rate = dataset_pool.get_dataset("sample_rate") CLOSE = sc["CLOSE"] info_filename = sc["info_file"] info_filename = os.path.join('.', info_filename) info_file = open(info_filename, "a") constraint_dict = {1: 'constrained', 0: 'unconstrained'} swing_cases_fix = 0 #set swing alternatives to constrained (1) or unconstrained (0) prob_correlation = None choice_set = resources['_model_'].choice_set J = choice_set.size() alt_id = choice_set.get_id_attribute() movers = choice_set.get_attribute('movers') resources.check_obligatory_keys(["capacity_string"]) supply = choice_set.get_attribute(resources["capacity_string"]) index = resources.get("index", None) if index is None: # no sampling case, alternative set is the full choice_set index = arange(J) if index.ndim <= 1: index = repeat(index[newaxis, :], N, axis=0) if resources.get('aggregate_to_dataset', None): aggregate_dataset = dataset_pool.get_dataset( resources.get('aggregate_to_dataset')) choice_set_aggregate_id = choice_set.get_attribute( aggregate_dataset.get_id_name()[0]) index = aggregate_dataset.get_id_index( choice_set_aggregate_id[index].ravel()).reshape(index.shape) supply = aggregate_dataset.get_attribute( resources["capacity_string"]) J = aggregate_dataset.size() movers = aggregate_dataset.get_attribute("movers") demand_history = movers[:, newaxis] resources.merge({"index": index}) pi = ones(index.shape, dtype=float32) #initialize pi #average_omega = ones(J,dtype=float32) #initialize average_omega logger.start_block('Outer Loop') for i in range(max_iter): logger.log_status('Outer Loop Iteration %s' % i) result = self.bhhh_estimation.run(data, modified_upc_sequence, resources) del self.bhhh_estimation collect() self.bhhh_estimation = bhhh_mnl_estimation() probability = modified_upc_sequence.get_probabilities() if data.shape[2] == V: #insert a placeholder for ln(pi) in data data = concatenate((data, ones((N, neqs, 1), dtype=float32)), axis=2) coef_names = resources.get("coefficient_names") coef_names = concatenate((coef_names, array(["ln_pi"]))) resources.merge({"coefficient_names": coef_names}) else: beta_ln_pi = result['estimators'][where( coef_names == 'ln_pi')][0] logger.log_status("mu = 1/%s = %s" % (beta_ln_pi, 1 / beta_ln_pi)) prob_hat = safe_array_divide(probability, pi**beta_ln_pi) #prob_hat = safe_array_divide(probability, pi) prob_hat_sum = prob_hat.sum(axis=1, dtype=float32) if not ma.allclose(prob_hat_sum, 1.0): logger.log_status( "probability doesn't sum up to 1, with minimum %s, and maximum %s" % (prob_hat_sum.min(), prob_hat_sum.max())) probability = normalize(prob_hat) demand = self.mnl_probabilities.get_demand(index, probability, J) * 1 / sample_rate demand_history = concatenate((demand_history, demand[:, newaxis]), axis=1) sdratio = safe_array_divide( supply, demand, return_value_if_denominator_is_zero=2.0) sdratio_matrix = sdratio[index] ## debug info from numpy import histogram from opus_core.misc import unique cc = histogram(index.ravel(), unique(index.ravel()))[0] logger.log_status( "=================================================================" ) logger.log_status("Probability min: %s, max: %s" % (probability.min(), probability.max())) logger.log_status("Demand min: %s, max: %s" % (demand.min(), demand.max())) logger.log_status("sdratio min: %s, max: %s" % (sdratio.min(), sdratio.max())) logger.log_status("demand[sdratio==sdratio.min()]=%s" % demand[sdratio == sdratio.min()]) logger.log_status("demand[sdratio==sdratio.max()]=%s" % demand[sdratio == sdratio.max()]) logger.log_status( "Counts of unique submarkets in alternatives min: %s, max: %s" % (cc.min(), cc.max())) logger.log_status( "=================================================================" ) constrained_locations_matrix, omega, info = self.inner_loop( supply, demand, probability, index, sdratio_matrix, J, max_iteration=max_iter) inner_iterations, constrained_locations_history, swing_index, average_omega_history = info for idx in swing_index: logger.log_status( "swinging alt with id %s set to %s" % (alt_id[idx], constraint_dict[swing_cases_fix])) constrained_locations_matrix[index == idx] = swing_cases_fix if swing_index.size > 0: info_file.write("swing of constraints found with id %s \n" % alt_id[swing_index]) info_file.write("outer_iteration, %i, " % i + ", ".join([str(i)] * (len(inner_iterations))) + "\n") info_file.write("inner_iteration, , " + ", ".join(inner_iterations) + "\n") info_file.write("id, sdratio, " + ", ".join(["avg_omega"] * len(inner_iterations)) + "\n") for idx in swing_index: line = str(alt_id[idx]) + ',' line += str(sdratio[idx]) + ',' line += ",".join( [str(x) for x in average_omega_history[idx, ]]) line += "\n" info_file.write(line) info_file.write("\n") info_file.flush() outer_iterations = [str(i)] * len(inner_iterations) prob_min = [str(probability.min())] * len(inner_iterations) prob_max = [str(probability.max())] * len(inner_iterations) pi_new = self.mnl_probabilities.get_pi( sdratio_matrix, omega, constrained_locations_matrix) data[:, :, -1] = ln(pi_new) #diagnostic output if not ma.allclose(pi, pi_new, atol=CLOSE): if i > 0: #don't print this for the first iteration logger.log_status("min of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).min()) logger.log_status("max of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).max()) logger.log_status("mean of pi(l+1) - pi(l): %s" % (pi_new - pi).mean()) logger.log_status( 'Standard Deviation pi(l+1) - pi(l): %s' % standard_deviation(pi_new - pi)) logger.log_status('correlation of pi(l+1) and pi(l): %s' % corr(pi_new.ravel(), pi.ravel())[0, 1]) pi = pi_new probability_old = probability # keep probability of the previous loop, for statistics computation only else: #convergence criterion achieved, quiting outer loop logger.log_status( "pi(l) == pi(l+1): Convergence criterion achieved") info_file.write("\nConstrained Locations History:\n") info_file.write("outer_iteration," + ",".join(outer_iterations) + "\n") info_file.write("inner_iteration," + ",".join(inner_iterations) + "\n") info_file.write("minimum_probability," + ",".join(prob_min) + "\n") info_file.write("maximum_probability," + ",".join(prob_max) + "\n") for row in range(J): line = [ str(x) for x in constrained_locations_history[row, ] ] info_file.write( str(alt_id[row]) + "," + ",".join(line) + "\n") info_file.flush() info_file.write("\nDemand History:\n") i_str = [str(x) for x in range(i)] info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n") #info_file.write(", ,\n") for row in range(J): line = [str(x) for x in demand_history[row, ]] info_file.write( str(alt_id[row]) + "," + ",".join(line) + "\n") demand_history_info_criteria = [500, 100, 50, 20] for criterion in demand_history_info_criteria: com_rows_index = where(movers <= criterion)[0] info_file.write( "\nDemand History for alternatives with less than or equal to %s movers in 1998:\n" % criterion) i_str = [str(x) for x in range(i)] info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n") #info_file.write(", movers,\n") for row in com_rows_index: line = [str(x) for x in demand_history[row, ]] info_file.write( str(alt_id[row]) + "," + ",".join(line) + "\n") #import pdb; pdb.set_trace() #export prob correlation history correlation_indices, prob_correlation = self.compute_prob_correlation( probability_old, probability, prob_hat, index, resources) info_file.write("\nCorrelation of Probabilities:\n") c_name = [ 'corr(p_ij p~_ij)', 'corr(p_ij p^_ij)', 'corr(p_ij dummy)', 'corr(p~_ij p^_ij)', 'corr(p~_ij dummy)', 'corr(p^_ij dummy)' ] info_file.write("com_id, " + ",".join(c_name) + "\n") #info_file.write(", ,\n") for row in range(correlation_indices.size): line = [str(x) for x in prob_correlation[row, ]] info_file.write( str(alt_id[correlation_indices[row]]) + "," + ",".join(line) + "\n") info_file.close() result['pi'] = pi return result logger.end_block() try: info_file.close() except: pass raise RuntimeError, "max iteration reached without convergence."
def compute(self, dataset_pool): return ln(self.get_dataset().get_attribute(self.comm_den) + 1) / 10
def compute(self, dataset_pool): return ln(self.get_dataset().get_attribute(self.dep_variable))
def compute(self, dataset_pool): return ln(self.get_dataset().get_attribute(self.house_den) + 1) / 10