Esempio n. 1
0
 def compute(self, dataset_pool):
     inc = reshape(self.get_dataset().get_attribute_of_dataset(self.hh_income),
                          (self.get_dataset().get_reduced_n(), 1))
     cost = self.get_dataset().get_2d_dataset_attribute(self.housing_cost)
     I = inc-cost
     affordability = where(I > 0, ln(I+1),I)
     return affordability
Esempio n. 2
0
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='land_covers',
                            table_data={
                                'relative_x': array([1, 2, 1, 2]),
                                'relative_y': array([1, 1, 2, 2]),
                                "lct": array([10, 10, 4, 3])
                            })

        dataset_pool = DatasetPool(package_order=['biocomplexity'],
                                   storage=storage)
        footprint = array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
        dataset_pool._add_dataset('constant', {
            "FOOTPRINT": footprint,
            'AG': 10,
        })

        gridcell = dataset_pool.get_dataset('land_cover')
        gridcell.compute_variables(self.variable_name,
                                   dataset_pool=dataset_pool)
        values = gridcell.get_attribute(self.variable_name)

        should_be = array([[1, 1], [0, 0]])
        should_be = correlate(ma.filled(should_be.astype(int32), 0),
                              footprint,
                              mode="reflect")
        should_be = less_equal((should_be / 5.0), 400)
        should_be = ln(distance_transform_edt(should_be) +
                       1) / dag.standardization_constant_distance
        should_be = ravel(transpose(should_be))  # flatten by id

        self.assert_(ma.allclose(values, should_be, rtol=1e-7),
                     msg="Error in " + self.variable_name)
Esempio n. 3
0
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(
            table_name='land_covers',
            table_data={
                'relative_x': array([1,2,1,2]),
                'relative_y': array([1,1,2,2]),
                "lct": array([11,2,4,3])
            }
        )

        dataset_pool = DatasetPool(package_order=['biocomplexity'],
                                   storage=storage)
        dataset_pool._add_dataset(
            'constant',
            {
                "CELLSIZE":30,
                "ALL_URBAN":['HU', 'MU', 'LU'],
                'HU': 1,
                'MU': 2,
                'LU': 3
            }
        )

        gridcell = dataset_pool.get_dataset('land_cover')
        gridcell.compute_variables(self.variable_name,
                                   dataset_pool=dataset_pool)
        values = gridcell.get_attribute(self.variable_name)

        should_be = array([1, 0, 1, 0], dtype=float32)
        should_be = ln(30*distance_transform_edt(should_be)+1) / ddt1.standardization_constant_distance

        self.assert_(ma.allclose( values, should_be, rtol=1e-7),
                     msg = "Error in " + self.variable_name)
Esempio n. 4
0
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(
            table_name='land_covers',
            table_data={
                'relative_x': array([1,2,1,2]),
                'relative_y': array([1,1,2,2]),
                "comm_add4": array([1, 2, 5, 15])
            }
        )

        dataset_pool = DatasetPool(package_order=['biocomplexity'],
                                   storage=storage)
        dataset_pool._add_dataset(
            'constant',
            {
                "CELLSIZE": 250    # this results in a 3x3 grid, (750/250)x(750/250)
            }
        )

        gridcell = dataset_pool.get_dataset('land_cover')
        gridcell.compute_variables(self.variable_name,
                                   dataset_pool=dataset_pool)
        values = gridcell.get_attribute(self.variable_name)

        should_be = array([1*4+2*2+5*2+15, 1*2+2*4+5+15*2, 1*2+2+5*4+15*2, 1+2*2+5*2+15*4])
        should_be = ln(should_be + 1) / 10.0

        self.assert_(ma.allclose( values, should_be, rtol=1e-7),
                     msg = "Error in " + self.variable_name)
Esempio n. 5
0
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='land_covers',
                            table_data={
                                'relative_x': array([1, 2, 1, 2]),
                                'relative_y': array([1, 1, 2, 2]),
                                "lct": array([11, 2, 4, 3])
                            })

        dataset_pool = DatasetPool(package_order=['biocomplexity'],
                                   storage=storage)
        dataset_pool._add_dataset(
            'constant', {
                "CELLSIZE": 30,
                "ALL_URBAN": ['HU', 'MU', 'LU'],
                'HU': 1,
                'MU': 2,
                'LU': 3
            })

        gridcell = dataset_pool.get_dataset('land_cover')
        gridcell.compute_variables(self.variable_name,
                                   dataset_pool=dataset_pool)
        values = gridcell.get_attribute(self.variable_name)

        should_be = array([1, 0, 1, 0], dtype=float32)
        should_be = ln(30 * distance_transform_edt(should_be) +
                       1) / ddt1.standardization_constant_distance

        self.assert_(ma.allclose(values, should_be, rtol=1e-7),
                     msg="Error in " + self.variable_name)
Esempio n. 6
0
 def compute(self, dataset_pool):
     cellsize = dataset_pool.get_dataset('constants')['CELLSIZE']
     fpdimension = int(self.footprint_width / cellsize)
     fp = ones((fpdimension, fpdimension), dtype="int32")
     summed = correlate( ma.filled( self.get_dataset().get_2d_attribute( self.comm_add ), 0.0 ), \
                         fp, mode="reflect" )
     return ln(self.get_dataset().flatten_by_id(summed)+1)/10.0
Esempio n. 7
0
    def test_my_inputs_for_hmps(self):
        variable_name = "biocomplexity.land_cover.hmps"
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='land_covers',
                            table_data={
                                'relative_x': array([1, 2, 1, 2]),
                                'relative_y': array([1, 1, 2, 2]),
                                "lct": array([1, 2, 1, 4]),
                            })

        dataset_pool = DatasetPool(package_order=['biocomplexity'],
                                   storage=storage)
        dataset_pool._add_dataset(
            'constant', {
                "FOOTPRINT": array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]),
                'HU': 1,
            })

        land_cover = dataset_pool.get_dataset('land_cover')
        land_cover.compute_variables(variable_name, dataset_pool=dataset_pool)
        values = land_cover.get_attribute(variable_name)

        should_be = array([2, 2, 2, 2], dtype=float32)
        should_be = ln(should_be + 1) / SSSmps.standardization_constant_MPS

        self.assert_(ma.allclose(values, should_be, rtol=1e-7),
                     msg="Error in " + variable_name)
Esempio n. 8
0
 def _run_stochastic_test_poisson(
     self, function, expected_results, number_of_iterations, significance_level=0.01, transformation=None
 ):
     """
     Run the given function for the specified number_of_iterations.
     Uses Bayesian statistics to determine whether the produced results are
     within the specified significance_level of the expected_results.
     """
     K = expected_results.size
     sum_y = zeros(K, dtype=float32)
     x_kr = zeros((number_of_iterations, K), dtype=float32)
     for i in range(number_of_iterations):
         y_r = function()
         x_kr[i, :] = try_transformation(y_r, transformation)
         sum_y = sum_y + x_kr[i, :]
     lambdak = sum_y / float(number_of_iterations)
     lambdanull = try_transformation(expected_results.astype(float32), transformation)
     #        print lambdak
     #        print lambdanull
     sumxk = sum(x_kr, axis=0)
     LRTS = 2.0 * (
         (number_of_iterations * (lambdanull - lambdak).sum())
         + (ln(lambdak / ma.masked_where(lambdanull == 0, lambdanull)) * sumxk).sum()
     )
     prob = chisqprob(LRTS, K)
     # print LRTS, prob
     logger.log_status("Stochastic Test Poisson: LRTS=" + str(LRTS) + ", df=", str(K), ", p=" + str(prob))
     return (prob >= significance_level, "prob=%f < significance level of %f" % (prob, significance_level))
Esempio n. 9
0
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='land_covers',
                            table_data={
                                'relative_x': array([1, 2, 1, 2]),
                                'relative_y': array([1, 1, 2, 2]),
                                "comm_add4": array([1, 2, 5, 15])
                            })

        dataset_pool = DatasetPool(package_order=['biocomplexity'],
                                   storage=storage)
        dataset_pool._add_dataset(
            'constant',
            {
                "CELLSIZE":
                250  # this results in a 3x3 grid, (750/250)x(750/250)
            })

        gridcell = dataset_pool.get_dataset('land_cover')
        gridcell.compute_variables(self.variable_name,
                                   dataset_pool=dataset_pool)
        values = gridcell.get_attribute(self.variable_name)

        should_be = array([
            1 * 4 + 2 * 2 + 5 * 2 + 15, 1 * 2 + 2 * 4 + 5 + 15 * 2,
            1 * 2 + 2 + 5 * 4 + 15 * 2, 1 + 2 * 2 + 5 * 2 + 15 * 4
        ])
        should_be = ln(should_be + 1) / 10.0

        self.assert_(ma.allclose(values, should_be, rtol=1e-7),
                     msg="Error in " + self.variable_name)
Esempio n. 10
0
 def compute(self, dataset_pool):
     cellsize = dataset_pool.get_dataset('constants')['CELLSIZE']
     fpdimension = int(self.footprint_width / cellsize)
     fp = ones((fpdimension, fpdimension), dtype="int32")
     summed = correlate( ma.filled( self.get_dataset().get_2d_attribute( self.comm_add ), 0.0 ), \
                         fp, mode="reflect" )
     return ln(self.get_dataset().flatten_by_id(summed) + 1) / 10.0
Esempio n. 11
0
    def test_my_inputs_for_hmps(self):
        variable_name = "biocomplexity.land_cover.hmps"
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(
            table_name='land_covers',
            table_data={
                'relative_x': array([1,2,1,2]),
                'relative_y': array([1,1,2,2]),
                "lct": array([1, 2, 1, 4]),
            }
        )

        dataset_pool = DatasetPool(package_order=['biocomplexity'],
                                   storage=storage)
        dataset_pool._add_dataset(
            'constant',
            {
                "FOOTPRINT": array([[0,1,0], [1,1,1], [0,1,0]]),
                'HU': 1,
            }
        )

        land_cover = dataset_pool.get_dataset('land_cover')
        land_cover.compute_variables(variable_name,
                                     dataset_pool=dataset_pool)
        values = land_cover.get_attribute(variable_name)

        should_be = array([2, 2, 2, 2], dtype=float32)
        should_be = ln(should_be + 1) / SSSmps.standardization_constant_MPS

        self.assert_(ma.allclose( values, should_be, rtol=1e-7),
                     msg = "Error in " + variable_name)
 def compute(self, dataset_pool):
     inc = reshape(
         self.get_dataset().get_attribute_of_dataset(self.hh_income),
         (self.get_dataset().get_reduced_n(), 1))
     cost = self.get_dataset().get_2d_dataset_attribute(self.housing_cost)
     I = inc - cost
     affordability = where(I > 0, ln(I + 1), I)
     return affordability
Esempio n. 13
0
 def test_my_inputs(self):        
     values = VariableTestToolbox().compute_variable(self.variable_name, 
         {"building":{
              "land_value": array([10, 5, 0])}, 
          }, 
         dataset = "building")
     should_be = ln(array([10, 5, 0]))
     
     self.assertEqual(ma.allclose(values, should_be, rtol=1e-2), True, msg = "Error in " + self.variable_name)
Esempio n. 14
0
 def compute(self, dataset_pool):
     constants = dataset_pool.get_dataset('constants')
     ag = constants["AG"]
     lct = ma.filled(self.get_dataset().get_2d_attribute(self.land_cover_type), 0)
     footprint = constants['FOOTPRINT']
     is_lct_ag = lct==ag
     summed = self._compute_patch_size_of_cover_types(is_lct_ag, footprint)
     summed = less_equal(summed, 400).astype(bool8)
     distances = distance_transform_edt(summed)
     return self.get_dataset().flatten_by_id(ln(distances + 1)
                                / self.standardization_constant_distance )
Esempio n. 15
0
 def compute(self, dataset_pool):
     constants = dataset_pool.get_dataset('constants')
     cellsize = constants["CELLSIZE"]
     all_urban_types = constants["ALL_URBAN"]
     all_urban_types = map(lambda key: constants[key], all_urban_types)
     lct = ma.filled(self.get_dataset().get_2d_attribute(self.land_cover_type), 0)
     is_lct_all_urban = zeros(shape=lct.shape, dtype="int32")
     for urban_type in all_urban_types:
         is_lct_all_urban += equal(lct, urban_type)
     temp = logical_not(is_lct_all_urban)
     dd = ln(cellsize*(distance_transform_edt(temp))+1) / self.standardization_constant_distance
     return self.get_dataset().flatten_by_id(dd).astype(float32)
Esempio n. 16
0
 def compute(self, dataset_pool):
     constants = dataset_pool.get_dataset('constants')
     ag = constants["AG"]
     lct = ma.filled(
         self.get_dataset().get_2d_attribute(self.land_cover_type), 0)
     footprint = constants['FOOTPRINT']
     is_lct_ag = lct == ag
     summed = self._compute_patch_size_of_cover_types(is_lct_ag, footprint)
     summed = less_equal(summed, 400).astype(bool8)
     distances = distance_transform_edt(summed)
     return self.get_dataset().flatten_by_id(
         ln(distances + 1) / self.standardization_constant_distance)
Esempio n. 17
0
    def compute(self, dataset_pool):
        constants = dataset_pool.get_dataset('constants')
        covertypes_of_interest = self._cover_type_translation(constants)
        lct = ma.filled(self.get_dataset().get_2d_attribute(self.land_cover_type), 0)
        footprint = constants['FOOTPRINT']
        is_lct_of_interest = reduce(lambda prev_answer, lct_num: logical_or(prev_answer, lct==lct_num),
                                    covertypes_of_interest,
                                    zeros(shape=lct.shape, dtype=int32))

        summed =  self._compute_patch_size_of_cover_types(is_lct_of_interest, footprint)
        return self.get_dataset().flatten_by_id(ln(summed + 1)
                                    / self.standardization_constant_MPS )
Esempio n. 18
0
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')        
        
        storage.write_table(
            table_name='gridcells',
            table_data={
                'grid_id': array([1,2,3]),
                'housing_cost': array([42, 84, 2]),
            }
        )
        storage.write_table(
            table_name='households',
            table_data={
                'household_id': array([1, 2, 3]),
                'income': array([42, 77, 99]),
            }
        )
        
        dataset_pool = DatasetPool(package_order=['urbansim'],
                                   storage=storage)

        household_x_gridcell = dataset_pool.get_dataset('household_x_gridcell')
        household_x_gridcell.compute_variables(self.variable_name, 
                                               dataset_pool=dataset_pool)
        values = household_x_gridcell.get_attribute(self.variable_name)
        
        should_be = array([[ 0, -42, ln(41)],
                           [ ln(36), -7, ln(76)],
                           [ ln(58), ln(16), ln(98)]])
        
        self.assert_(ma.allclose(values, should_be, rtol=1e-7), 
                     msg="Error in " + self.variable_name)
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='gridcells',
                            table_data={
                                'grid_id': array([1, 2, 3]),
                                'housing_cost': array([42, 84, 2]),
                            })
        storage.write_table(table_name='households',
                            table_data={
                                'household_id': array([1, 2, 3]),
                                'income': array([42, 77, 99]),
                            })

        dataset_pool = DatasetPool(package_order=['urbansim'], storage=storage)

        household_x_gridcell = dataset_pool.get_dataset('household_x_gridcell')
        household_x_gridcell.compute_variables(self.variable_name,
                                               dataset_pool=dataset_pool)
        values = household_x_gridcell.get_attribute(self.variable_name)

        should_be = array([[0, -42, ln(41)], [ln(36), -7, ln(76)],
                           [ln(58), ln(16), ln(98)]])

        self.assert_(ma.allclose(values, should_be, rtol=1e-7),
                     msg="Error in " + self.variable_name)
 def test_my_inputs(self):
     avg_value = array([21,22,27,42]) 
     some_gridcell_zone_ids = array([1,2,1,3]) 
     grid_id = array([1,2,3,4])
     
     values = VariableTestToolbox().compute_variable(self.variable_name, 
         {"zone":{
             "zone_id":array([1,2, 3]),
              "avg_val_per_unit_commercial": array([10, 5, 0])}, 
          }, 
         dataset = "zone")
     should_be = ln(array([10, 5, 0]))
     
     self.assertEqual(ma.allclose(values, should_be, rtol=1e-2), True, msg = "Error in " + self.variable_name)
Esempio n. 21
0
 def test_my_inputs(self):
     avg_value = array([21,22,27,42]) 
     some_gridcell_zone_ids = array([1,2,1,3]) 
     grid_id = array([1,2,3,4])
     
     values = VariableTestToolbox().compute_variable(self.variable_name, 
         {"zone":{
             "zone_id":array([1,2, 3]),
              "avg_val_per_unit_commercial": array([10, 5, 0])}, 
          }, 
         dataset = "zone")
     should_be = ln(array([10, 5, 0]))
     
     self.assertEqual(ma.allclose(values, should_be, rtol=1e-2), True, msg = "Error in " + self.variable_name)
Esempio n. 22
0
 def compute(self, dataset_pool):
     constants = dataset_pool.get_dataset('constants')
     cellsize = constants["CELLSIZE"]
     all_urban_types = constants["ALL_URBAN"]
     all_urban_types = map(lambda key: constants[key], all_urban_types)
     lct = ma.filled(
         self.get_dataset().get_2d_attribute(self.land_cover_type), 0)
     is_lct_all_urban = zeros(shape=lct.shape, dtype="int32")
     for urban_type in all_urban_types:
         is_lct_all_urban += equal(lct, urban_type)
     temp = logical_not(is_lct_all_urban)
     dd = ln(cellsize * (distance_transform_edt(temp)) +
             1) / self.standardization_constant_distance
     return self.get_dataset().flatten_by_id(dd).astype(float32)
Esempio n. 23
0
    def compute(self, dataset_pool):
        constants = dataset_pool.get_dataset('constants')
        covertypes_of_interest = self._cover_type_translation(constants)
        lct = ma.filled(
            self.get_dataset().get_2d_attribute(self.land_cover_type), 0)
        footprint = constants['FOOTPRINT']
        is_lct_of_interest = reduce(
            lambda prev_answer, lct_num: logical_or(prev_answer, lct == lct_num
                                                    ), covertypes_of_interest,
            zeros(shape=lct.shape, dtype=int32))

        summed = self._compute_patch_size_of_cover_types(
            is_lct_of_interest, footprint)
        return self.get_dataset().flatten_by_id(
            ln(summed + 1) / self.standardization_constant_MPS)
 def _run_stochastic_test_poisson(self,
                                  function,
                                  expected_results,
                                  number_of_iterations,
                                  significance_level=0.01,
                                  transformation=None):
     """
     Run the given function for the specified number_of_iterations.
     Uses Bayesian statistics to determine whether the produced results are
     within the specified significance_level of the expected_results.
     """
     K = expected_results.size
     sum_y = zeros(K, dtype=float32)
     x_kr = zeros((number_of_iterations, K), dtype=float32)
     for i in range(number_of_iterations):
         y_r = function()
         x_kr[i, :] = try_transformation(y_r, transformation)
         sum_y = sum_y + x_kr[i, :]
     lambdak = sum_y / float(number_of_iterations)
     lambdanull = try_transformation(expected_results.astype(float32),
                                     transformation)
     #        print lambdak
     #        print lambdanull
     sumxk = sum(x_kr, axis=0)
     LRTS = 2.0 * (
         (number_of_iterations * (lambdanull - lambdak).sum()) +
         (ln(lambdak / ma.masked_where(lambdanull == 0, lambdanull)) *
          sumxk).sum())
     prob = chisqprob(LRTS, K)
     #print LRTS, prob
     logger.log_status(
         "Stochastic Test Poisson: LRTS=" + str(LRTS) + ", df=", str(K),
         ", p=" + str(prob))
     return (prob >= significance_level,
             "prob=%f < significance level of %f" %
             (prob, significance_level))
Esempio n. 25
0
    def test_my_inputs(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(
            table_name='land_covers',
            table_data={
                'relative_x': array([1,2,1,2]),
                'relative_y': array([1,1,2,2]),
                "lct": array([10,10,4,3])
            }
        )

        dataset_pool = DatasetPool(package_order=['biocomplexity'],
                                   storage=storage)
        footprint = array([[0,1,0], [1,1,1], [0,1,0]])
        dataset_pool._add_dataset(
            'constant',
            {
                "FOOTPRINT": footprint,
                'AG': 10,
            }
        )

        gridcell = dataset_pool.get_dataset('land_cover')
        gridcell.compute_variables(self.variable_name,
                                   dataset_pool=dataset_pool)
        values = gridcell.get_attribute(self.variable_name)

        should_be = array([[1,1],[0,0]])
        should_be = correlate(ma.filled(should_be.astype(int32), 0), footprint, mode="reflect")
        should_be = less_equal((should_be/5.0), 400)
        should_be = ln(distance_transform_edt(should_be)+1) / dag.standardization_constant_distance
        should_be = ravel(transpose(should_be)) # flatten by id

        self.assert_(ma.allclose( values, should_be, rtol=1e-7),
                     msg = "Error in " + self.variable_name)
    def run(self, data, upc_sequence, resources=None):

        self.mnl_probabilities=upc_sequence.probability_class
        self.bhhh_estimation = bhhh_mnl_estimation()

        modified_upc_sequence = UPCFactory().get_model(
            utilities=None, probabilities="opus_core.mnl_probabilities", choices=None)
        modified_upc_sequence.utility_class = upc_sequence.utility_class

        N, neqs, V = data.shape

        max_iter = resources.get("max_iterations", 100)  # default
        sc = SessionConfiguration()
        dataset_pool = sc.get_dataset_pool()
        sample_rate = dataset_pool.get_dataset("sample_rate")
        
        CLOSE = sc["CLOSE"]
        info_filename = sc["info_file"]
        info_filename = os.path.join('.', info_filename)
        info_file = open(info_filename, "a")
        constraint_dict = {1:'constrained', 0:'unconstrained'}
        swing_cases_fix = 0  #set swing alternatives to constrained (1) or unconstrained (0)
        prob_correlation = None
        
        choice_set = resources['_model_'].choice_set
        J = choice_set.size()
        alt_id = choice_set.get_id_attribute()
        movers = choice_set.get_attribute('movers')

        resources.check_obligatory_keys(["capacity_string"])
        supply = choice_set.get_attribute(resources["capacity_string"])

        index = resources.get("index", None)
        if index is None: # no sampling case, alternative set is the full choice_set
            index = arange(J)
        if index.ndim <= 1:
            index = repeat(index[newaxis,:], N, axis=0)

        if resources.get('aggregate_to_dataset', None):
            aggregate_dataset = dataset_pool.get_dataset(resources.get('aggregate_to_dataset'))
            choice_set_aggregate_id = choice_set.get_attribute(aggregate_dataset.get_id_name()[0])
            index = aggregate_dataset.get_id_index(choice_set_aggregate_id[index].ravel()).reshape(index.shape)

            supply = aggregate_dataset.get_attribute(resources["capacity_string"])
            J = aggregate_dataset.size()

            movers = aggregate_dataset.get_attribute("movers")

        demand_history = movers[:, newaxis]
        resources.merge({"index":index})
        
        pi = ones(index.shape, dtype=float32)  #initialize pi
        #average_omega = ones(J,dtype=float32)  #initialize average_omega
        logger.start_block('Outer Loop')
        for i in range(max_iter):
            logger.log_status('Outer Loop Iteration %s' % i)

            result = self.bhhh_estimation.run(data, modified_upc_sequence, resources)
            del self.bhhh_estimation; collect()
            self.bhhh_estimation = bhhh_mnl_estimation()

            probability = modified_upc_sequence.get_probabilities()
            if data.shape[2] == V:  #insert a placeholder for ln(pi) in data
                data = concatenate((data,ones((N,neqs,1),dtype=float32)), axis=2)
                coef_names = resources.get("coefficient_names")
                coef_names = concatenate( (coef_names, array(["ln_pi"])) )
                resources.merge({"coefficient_names":coef_names})
            else:
                beta_ln_pi = result['estimators'][where(coef_names == 'ln_pi')][0]
                logger.log_status("mu = 1/%s = %s" % (beta_ln_pi, 1/beta_ln_pi))
                
                prob_hat = safe_array_divide(probability, pi ** beta_ln_pi)
                #prob_hat = safe_array_divide(probability, pi)
                prob_hat_sum = prob_hat.sum(axis=1, dtype=float32)
                if not ma.allclose(prob_hat_sum, 1.0):
                    logger.log_status("probability doesn't sum up to 1, with minimum %s, and maximum %s" %
                                      (prob_hat_sum.min(), prob_hat_sum.max()))
                    
                    probability = normalize(prob_hat)

            demand = self.mnl_probabilities.get_demand(index, probability, J) * 1 / sample_rate
            demand_history = concatenate((demand_history,
                                          demand[:, newaxis]),
                                          axis=1)

            sdratio = safe_array_divide(supply, demand, return_value_if_denominator_is_zero=2.0)
            sdratio_matrix = sdratio[index]
            ## debug info
            from numpy import histogram 
            from opus_core.misc import unique
            cc = histogram(index.ravel(), unique(index.ravel()))[0]
            logger.log_status( "=================================================================")
            logger.log_status( "Probability min: %s, max: %s" % (probability.min(), probability.max()) )
            logger.log_status( "Demand min: %s, max: %s" % (demand.min(), demand.max()) )
            logger.log_status( "sdratio min: %s, max: %s" % (sdratio.min(), sdratio.max()) )
            logger.log_status( "demand[sdratio==sdratio.min()]=%s" % demand[sdratio==sdratio.min()] )
            logger.log_status( "demand[sdratio==sdratio.max()]=%s" % demand[sdratio==sdratio.max()] )
            logger.log_status( "Counts of unique submarkets in alternatives min: %s, max: %s" % (cc.min(), cc.max()) )
            logger.log_status( "=================================================================")

            constrained_locations_matrix, omega, info = self.inner_loop(supply, demand, probability,
                                                                        index, sdratio_matrix,
                                                                        J, max_iteration=max_iter)

            inner_iterations, constrained_locations_history, swing_index, average_omega_history = info
    
            for idx in swing_index:
                logger.log_status("swinging alt with id %s set to %s" % (alt_id[idx], constraint_dict[swing_cases_fix]))
                constrained_locations_matrix[index==idx] = swing_cases_fix
    
            if swing_index.size > 0:    
                info_file.write("swing of constraints found with id %s \n" % alt_id[swing_index])
                info_file.write("outer_iteration, %i, " % i + ", ".join([str(i)]*(len(inner_iterations))) + "\n")
                info_file.write("inner_iteration, , " + ", ".join(inner_iterations) + "\n")
                info_file.write("id, sdratio, " + ", ".join(["avg_omega"]*len(inner_iterations)) + "\n")
                for idx in swing_index:
                    line = str(alt_id[idx]) + ','
                    line += str(sdratio[idx]) + ','
                    line += ",".join([str(x) for x in average_omega_history[idx,]])
                    line += "\n"
                    info_file.write(line)
    
                info_file.write("\n")
                info_file.flush()

            outer_iterations = [str(i)] * len(inner_iterations)
            prob_min = [str(probability.min())] * len(inner_iterations)
            prob_max = [str(probability.max())] * len(inner_iterations)

            pi_new = self.mnl_probabilities.get_pi(sdratio_matrix, omega, constrained_locations_matrix)

            data[:,:,-1] = ln(pi_new)
            #diagnostic output
            
            if not ma.allclose(pi, pi_new, atol=CLOSE):
                if i > 0:  #don't print this for the first iteration
                    logger.log_status("min of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).min())
                    logger.log_status("max of abs(pi(l+1) - pi(l)): %s" % absolute(pi_new - pi).max())
                    logger.log_status("mean of pi(l+1) - pi(l): %s" % (pi_new - pi).mean())
                    logger.log_status('Standard Deviation pi(l+1) - pi(l): %s' % standard_deviation(pi_new - pi))
                    logger.log_status('correlation of pi(l+1) and pi(l): %s' % corr(pi_new.ravel(), pi.ravel())[0,1])

                pi = pi_new
                probability_old = probability   # keep probability of the previous loop, for statistics computation only    
            else:   #convergence criterion achieved, quiting outer loop
                logger.log_status("pi(l) == pi(l+1): Convergence criterion achieved")
    
                info_file.write("\nConstrained Locations History:\n")
                info_file.write("outer_iteration," + ",".join(outer_iterations) + "\n")
                info_file.write("inner_iteration," + ",".join(inner_iterations) + "\n")
                info_file.write("minimum_probability," + ",".join(prob_min) + "\n")
                info_file.write("maximum_probability," + ",".join(prob_max) + "\n")
                for row in range(J):
                    line = [str(x) for x in constrained_locations_history[row,]]
                    info_file.write(str(alt_id[row]) + "," + ",".join(line) + "\n")

                info_file.flush()

                info_file.write("\nDemand History:\n")
                i_str = [str(x) for x in range(i)]
                info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n")
                #info_file.write(", ,\n")
                for row in range(J):
                    line = [str(x) for x in demand_history[row,]]
                    info_file.write(str(alt_id[row]) + "," + ",".join(line) + "\n")

                demand_history_info_criteria = [500, 100, 50, 20]
                for criterion in demand_history_info_criteria:
                    com_rows_index = where(movers <= criterion)[0]
                    info_file.write("\nDemand History for alternatives with less than or equal to %s movers in 1998:\n" % criterion)
                    i_str = [str(x) for x in range(i)]
                    info_file.write("outer_iteration, (movers)," + ",".join(i_str) + "\n")
                    #info_file.write(", movers,\n")
                    for row in com_rows_index:
                        line = [str(x) for x in demand_history[row,]]
                        info_file.write(str(alt_id[row]) + "," + ",".join(line) + "\n")

                #import pdb; pdb.set_trace()
                #export prob correlation history
                correlation_indices, prob_correlation = self.compute_prob_correlation(probability_old, probability, prob_hat, index, resources)

                info_file.write("\nCorrelation of Probabilities:\n")
                c_name = ['corr(p_ij p~_ij)', 'corr(p_ij p^_ij)', 'corr(p_ij dummy)', 'corr(p~_ij p^_ij)', 'corr(p~_ij dummy)', 'corr(p^_ij dummy)']

                info_file.write("com_id, " + ",".join(c_name) + "\n")

                #info_file.write(", ,\n")
                for row in range(correlation_indices.size):
                    line = [str(x) for x in prob_correlation[row,]]
                    info_file.write(str(alt_id[correlation_indices[row]]) + "," + ",".join(line) + "\n")

                info_file.close()

                result['pi'] = pi
                return result

        logger.end_block()
        try:info_file.close()
        except:pass

        raise RuntimeError, "max iteration reached without convergence."
Esempio n. 27
0
    def run(self, data, upc_sequence, resources=None):
        CLOSE = 0.01

        self.mnl_probabilities=upc_sequence.probability_class
        self.bhhh_estimation = bhhh_mnl_estimation()

        modified_upc_sequence = UPCFactory().get_model(
            utilities=None, probabilities="opus_core.mnl_probabilities", choices=None)
        modified_upc_sequence.utility_class = upc_sequence.utility_class

        result = self.bhhh_estimation.run(data, modified_upc_sequence, resources)
        probability = modified_upc_sequence.get_probabilities()
        probability_0 = probability

        resources.check_obligatory_keys(["capacity"])

        supply = resources["capacity"]

        if not isinstance(supply, ndarray):
            supply = array(supply)
        nsupply = supply.size

        max_iter = resources.get("max_iterations", None)
        if max_iter == None:
            max_iter = 100 # default

        index = resources.get("index", None)
        if index == None:
            index = arange(nsupply)

        neqs = probability.shape[1]
        nobs = probability.shape[0]

        if index.ndim <= 1:
            index = repeat(reshape(index, (1,index.shape[0])), nobs)
        resources.merge({"index":index})

        # WARNING: THE SCALING OF DEMAND IS HARD CODED AND NEEDS TO BE MADE AN ARGUMENT
        # scale demand to represent 100% from a 0.2% sample
        demand = self.mnl_probabilities.get_demand(index, probability, nsupply)*50

        #initial calculations
        sdratio = ma.filled(supply/ma.masked_where(demand==0, demand),2.0)
        sdratio = _round(sdratio, 1.0, atol=CLOSE)
        constrained_locations = logical_and(sdratio<1.0,demand-supply>CLOSE).astype("int8")
        unconstrained_locations = 1-constrained_locations
        excess_demand = (demand-supply)*constrained_locations
        global_excess_demand = excess_demand.sum()

        sdratio_matrix = sdratio[index]
        constrained_locations_matrix = constrained_locations[index]
        constrained_ex_ante = constrained_locations_matrix
# Would like to include following print statements in debug printing
#        logger.log_status('Total demand:',demand.sum())
#        logger.log_status('Total supply:',supply.sum())
        logger.log_status('Global excess demand:',global_excess_demand)
#        logger.log_status('Constrained locations:',constrained_locations.sum())
        unconstrained_locations_matrix = unconstrained_locations[index]

#        omega = ones(nobs,type=float32)
#        pi = self.constrain_probabilities.get_pi(sdratio_matrix, omega, constrained_locations_matrix, unconstrained_locations_matrix, nobs)

        omega = self.mnl_probabilities.get_omega(probability, constrained_locations_matrix, unconstrained_locations_matrix, sdratio_matrix)
        omega = _round(omega, 1.0, CLOSE)

        print 'Num of constrainted locations: ', constrained_locations.sum()
        print 'Num of unconstrainted locations: ', unconstrained_locations.sum()
        print 'Min Ex Ante Constraints:',min(constrained_ex_ante.sum(axis=1))
        print 'Max Ex Ante Constraints:',max(constrained_ex_ante.sum(axis=1))
        #print 'Omega shape',omega.shape
        #print 'Omega histogram',histogram(omega,0,4,40)
        print 'Minimum Omega',min(omega)
        print 'Maximum Omega',max(omega)
        print 'Mean Omega:',mean(omega)
        print 'Median Omega:',median(omega)
        print 'Sum Omega:',omega.sum()
        print 'Standard Deviation Omega:',standard_deviation(omega)
        print 'Count of Negative Omega',(where(omega<0,1,0).sum())
        print 'Count of Omega < 1',(where(omega<1,1,0).sum())
        print 'Count of Omega > 2',(where(omega>2,1,0).sum())
        print 'Count of Omega > 4',(where(omega>4,1,0).sum())

        average_omega = self.mnl_probabilities.get_average_omega(omega, probability, index, nsupply, nobs, demand)
        average_omega=_round(average_omega, 1.0, CLOSE)

        coef_names = resources.get("coefficient_names", None)

        if coef_names is not None:
            coef_names = array(coef_names.tolist()+["ln_pi"])
            resources.merge({"coefficient_names":coef_names})

        data=concatenate((data,ones((nobs,neqs,1),dtype=float32)), axis=2)

        prev_omega = omega
        prev_constrained_locations_matrix = constrained_locations_matrix

        for i in range(max_iter):
            print
            print 'Iteration',i
            pi = self.mnl_probabilities.get_pi(sdratio_matrix, omega, constrained_locations_matrix, unconstrained_locations_matrix, nobs)
            #print 'pi shape',pi.shape
            #print 'data shape', data.shape
            #print 'min_pi',min(pi,axis=1)
            #print 'max_pi',max(pi,axis=1)
            #print 'min_data',min(data,axis=1)
            #print 'max_data',max(data,axis=1)
            data[:,:,-1] = ln(pi)
            #data = concatenate((data,(pi[:,:,newaxis])),axis=-1)

            #print 'data shape after contatenating pi', data.shape
            result = self.bhhh_estimation.run(data, modified_upc_sequence, resources)
            #print
            #print 'result',result
            probability = modified_upc_sequence.get_probabilities()
            prob_hat = ma.filled(probability / pi, 0.0)


            # HARD CODED
            # scale new_demand from 0.2% to 100%
            demand_new = self.mnl_probabilities.get_demand(index, prob_hat, nsupply)*50
            ##update supply-demand ratio
            sdratio = ma.filled(supply/ma.masked_where(demand_new==0, demand_new),2.0)
            sdratio = _round(sdratio, 1.0, CLOSE)
            sdratio_matrix = sdratio[index]

            constrained_locations = where(((average_omega*demand_new - supply) > CLOSE),1,0)
            unconstrained_locations = 1-constrained_locations
            constrained_locations_matrix = constrained_locations[index]
            unconstrained_locations_matrix = unconstrained_locations[index]
            constrained_ex_post = constrained_locations_matrix
            constrained_ex_post_not_ex_ante = where((constrained_ex_post - constrained_ex_ante)==1,1,0)
            constrained_ex_ante_not_ex_post = where((constrained_ex_post - constrained_ex_ante)==-1,1,0)

            #Assumption 5: if j belongs to constrained ex post and unconstrained ex ante, then p^i_j <= D_j / S_j
            print 'Number of individual violating Assumption 5: ', where((probability > 1 / sdratio_matrix)*constrained_ex_post_not_ex_ante)[0].size

            #Assumption 6: pi of constrained locations should be less than 1
            print 'Number of individual violating Assumption 6: ', where((probability * constrained_ex_post).sum(axis=1) >
                                                                         (prob_hat * constrained_ex_post).sum(axis=1))[0].size
            ##OR ?
            #print 'Assumption 6: ', where(pi[where(constrained_locations_matrix)] > 1)[0].size

            print 'number of constrainted locations: ', constrained_locations.sum()
            print 'number of unconstrainted locations: ', unconstrained_locations.sum()
            print 'Min Ex Post Constraints:',min(constrained_ex_post.sum(axis=1))
            print 'Max Ex Post Constraints:',max(constrained_ex_post.sum(axis=1))
            print 'At Least 1 Constrained Ex Ante Not Ex Post*:',where(constrained_ex_ante_not_ex_post.sum(axis=1))[0].size
            print 'At Least 1 Constrained Ex Post Not Ex Ante:',where(constrained_ex_post_not_ex_ante.sum(axis=1))[0].size

            omega = self.mnl_probabilities.get_omega(prob_hat, constrained_locations_matrix, unconstrained_locations_matrix, sdratio_matrix)
            omega = _round(omega, 1.0, CLOSE)
            #print 'Omega histogram',histogram(omega,0,4,40)
            print 'Minimum Omega',min(omega)
            print 'Maximum Omega',max(omega)
            print 'Mean Omega:',mean(omega)
            print 'Median Omega:',median(omega)
            print 'Sum Omega:',omega.sum()
            print 'Standard Deviation Omega:',standard_deviation(omega)
            print 'Count of Negative Omega',(where(omega<0,1,0).sum())
            print 'Count of Omega < 1: ',(where(omega<1,1,0).sum())
            print 'Count of Omega > 2: ',(where(omega>2,1,0).sum())
            print 'Count of Omega > 4: ',(where(omega>4,1,0).sum())

            average_omega = self.mnl_probabilities.get_average_omega(omega, prob_hat, index, nsupply, nobs, demand_new)
            average_omega = _round(average_omega, 1.0, CLOSE)
            excess_demand = (demand_new-supply)*constrained_locations
            global_excess_demand = excess_demand.sum()
            #print 'Omega [i], [i-1]',prev_omega, omega,
            #print 'Constrained locations [i], [i-1]',constrained_locations_matrix, prev_constrained_locations_matrix
            print 'Global Excess Demand',global_excess_demand
            if ma.allclose(omega, prev_omega, atol=1e-3) or not any(constrained_locations_matrix - prev_constrained_ex_ante):
                print 'omega or constrained ex post unchanged: Convergence criterion achieved'
                break

            #if global_excess_demand < 1:
                #print 'Global excess demand < 1: Convergence criterion achieved'
                #break

        return result
Esempio n. 28
0
 def compute(self, dataset_pool):    
     return ln(self.get_dataset().get_attribute(self.income))
    def run(self, data, upc_sequence, resources=None):

        self.mnl_probabilities = upc_sequence.probability_class
        self.bhhh_estimation = bhhh_mnl_estimation()

        modified_upc_sequence = UPCFactory().get_model(
            utilities=None,
            probabilities="opus_core.mnl_probabilities",
            choices=None)
        modified_upc_sequence.utility_class = upc_sequence.utility_class

        N, neqs, V = data.shape

        max_iter = resources.get("max_iterations", 100)  # default
        sc = SessionConfiguration()
        dataset_pool = sc.get_dataset_pool()
        sample_rate = dataset_pool.get_dataset("sample_rate")

        CLOSE = sc["CLOSE"]
        info_filename = sc["info_file"]
        info_filename = os.path.join('.', info_filename)
        info_file = open(info_filename, "a")
        constraint_dict = {1: 'constrained', 0: 'unconstrained'}
        swing_cases_fix = 0  #set swing alternatives to constrained (1) or unconstrained (0)
        prob_correlation = None

        choice_set = resources['_model_'].choice_set
        J = choice_set.size()
        alt_id = choice_set.get_id_attribute()
        movers = choice_set.get_attribute('movers')

        resources.check_obligatory_keys(["capacity_string"])
        supply = choice_set.get_attribute(resources["capacity_string"])

        index = resources.get("index", None)
        if index is None:  # no sampling case, alternative set is the full choice_set
            index = arange(J)
        if index.ndim <= 1:
            index = repeat(index[newaxis, :], N, axis=0)

        if resources.get('aggregate_to_dataset', None):
            aggregate_dataset = dataset_pool.get_dataset(
                resources.get('aggregate_to_dataset'))
            choice_set_aggregate_id = choice_set.get_attribute(
                aggregate_dataset.get_id_name()[0])
            index = aggregate_dataset.get_id_index(
                choice_set_aggregate_id[index].ravel()).reshape(index.shape)

            supply = aggregate_dataset.get_attribute(
                resources["capacity_string"])
            J = aggregate_dataset.size()

            movers = aggregate_dataset.get_attribute("movers")

        demand_history = movers[:, newaxis]
        resources.merge({"index": index})

        pi = ones(index.shape, dtype=float32)  #initialize pi
        #average_omega = ones(J,dtype=float32)  #initialize average_omega
        logger.start_block('Outer Loop')
        for i in range(max_iter):
            logger.log_status('Outer Loop Iteration %s' % i)

            result = self.bhhh_estimation.run(data, modified_upc_sequence,
                                              resources)
            del self.bhhh_estimation
            collect()
            self.bhhh_estimation = bhhh_mnl_estimation()

            probability = modified_upc_sequence.get_probabilities()
            if data.shape[2] == V:  #insert a placeholder for ln(pi) in data
                data = concatenate((data, ones((N, neqs, 1), dtype=float32)),
                                   axis=2)
                coef_names = resources.get("coefficient_names")
                coef_names = concatenate((coef_names, array(["ln_pi"])))
                resources.merge({"coefficient_names": coef_names})
            else:
                beta_ln_pi = result['estimators'][where(
                    coef_names == 'ln_pi')][0]
                logger.log_status("mu = 1/%s = %s" %
                                  (beta_ln_pi, 1 / beta_ln_pi))

                prob_hat = safe_array_divide(probability, pi**beta_ln_pi)
                #prob_hat = safe_array_divide(probability, pi)
                prob_hat_sum = prob_hat.sum(axis=1, dtype=float32)
                if not ma.allclose(prob_hat_sum, 1.0):
                    logger.log_status(
                        "probability doesn't sum up to 1, with minimum %s, and maximum %s"
                        % (prob_hat_sum.min(), prob_hat_sum.max()))

                    probability = normalize(prob_hat)

            demand = self.mnl_probabilities.get_demand(index, probability,
                                                       J) * 1 / sample_rate
            demand_history = concatenate((demand_history, demand[:, newaxis]),
                                         axis=1)

            sdratio = safe_array_divide(
                supply, demand, return_value_if_denominator_is_zero=2.0)
            sdratio_matrix = sdratio[index]
            ## debug info
            from numpy import histogram
            from opus_core.misc import unique
            cc = histogram(index.ravel(), unique(index.ravel()))[0]
            logger.log_status(
                "================================================================="
            )
            logger.log_status("Probability min: %s, max: %s" %
                              (probability.min(), probability.max()))
            logger.log_status("Demand min: %s, max: %s" %
                              (demand.min(), demand.max()))
            logger.log_status("sdratio min: %s, max: %s" %
                              (sdratio.min(), sdratio.max()))
            logger.log_status("demand[sdratio==sdratio.min()]=%s" %
                              demand[sdratio == sdratio.min()])
            logger.log_status("demand[sdratio==sdratio.max()]=%s" %
                              demand[sdratio == sdratio.max()])
            logger.log_status(
                "Counts of unique submarkets in alternatives min: %s, max: %s"
                % (cc.min(), cc.max()))
            logger.log_status(
                "================================================================="
            )

            constrained_locations_matrix, omega, info = self.inner_loop(
                supply,
                demand,
                probability,
                index,
                sdratio_matrix,
                J,
                max_iteration=max_iter)

            inner_iterations, constrained_locations_history, swing_index, average_omega_history = info

            for idx in swing_index:
                logger.log_status(
                    "swinging alt with id %s set to %s" %
                    (alt_id[idx], constraint_dict[swing_cases_fix]))
                constrained_locations_matrix[index == idx] = swing_cases_fix

            if swing_index.size > 0:
                info_file.write("swing of constraints found with id %s \n" %
                                alt_id[swing_index])
                info_file.write("outer_iteration, %i, " % i +
                                ", ".join([str(i)] *
                                          (len(inner_iterations))) + "\n")
                info_file.write("inner_iteration, , " +
                                ", ".join(inner_iterations) + "\n")
                info_file.write("id, sdratio, " +
                                ", ".join(["avg_omega"] *
                                          len(inner_iterations)) + "\n")
                for idx in swing_index:
                    line = str(alt_id[idx]) + ','
                    line += str(sdratio[idx]) + ','
                    line += ",".join(
                        [str(x) for x in average_omega_history[idx, ]])
                    line += "\n"
                    info_file.write(line)

                info_file.write("\n")
                info_file.flush()

            outer_iterations = [str(i)] * len(inner_iterations)
            prob_min = [str(probability.min())] * len(inner_iterations)
            prob_max = [str(probability.max())] * len(inner_iterations)

            pi_new = self.mnl_probabilities.get_pi(
                sdratio_matrix, omega, constrained_locations_matrix)

            data[:, :, -1] = ln(pi_new)
            #diagnostic output

            if not ma.allclose(pi, pi_new, atol=CLOSE):
                if i > 0:  #don't print this for the first iteration
                    logger.log_status("min of abs(pi(l+1) - pi(l)): %s" %
                                      absolute(pi_new - pi).min())
                    logger.log_status("max of abs(pi(l+1) - pi(l)): %s" %
                                      absolute(pi_new - pi).max())
                    logger.log_status("mean of pi(l+1) - pi(l): %s" %
                                      (pi_new - pi).mean())
                    logger.log_status(
                        'Standard Deviation pi(l+1) - pi(l): %s' %
                        standard_deviation(pi_new - pi))
                    logger.log_status('correlation of pi(l+1) and pi(l): %s' %
                                      corr(pi_new.ravel(), pi.ravel())[0, 1])

                pi = pi_new
                probability_old = probability  # keep probability of the previous loop, for statistics computation only
            else:  #convergence criterion achieved, quiting outer loop
                logger.log_status(
                    "pi(l) == pi(l+1): Convergence criterion achieved")

                info_file.write("\nConstrained Locations History:\n")
                info_file.write("outer_iteration," +
                                ",".join(outer_iterations) + "\n")
                info_file.write("inner_iteration," +
                                ",".join(inner_iterations) + "\n")
                info_file.write("minimum_probability," + ",".join(prob_min) +
                                "\n")
                info_file.write("maximum_probability," + ",".join(prob_max) +
                                "\n")
                for row in range(J):
                    line = [
                        str(x) for x in constrained_locations_history[row, ]
                    ]
                    info_file.write(
                        str(alt_id[row]) + "," + ",".join(line) + "\n")

                info_file.flush()

                info_file.write("\nDemand History:\n")
                i_str = [str(x) for x in range(i)]
                info_file.write("outer_iteration, (movers)," +
                                ",".join(i_str) + "\n")
                #info_file.write(", ,\n")
                for row in range(J):
                    line = [str(x) for x in demand_history[row, ]]
                    info_file.write(
                        str(alt_id[row]) + "," + ",".join(line) + "\n")

                demand_history_info_criteria = [500, 100, 50, 20]
                for criterion in demand_history_info_criteria:
                    com_rows_index = where(movers <= criterion)[0]
                    info_file.write(
                        "\nDemand History for alternatives with less than or equal to %s movers in 1998:\n"
                        % criterion)
                    i_str = [str(x) for x in range(i)]
                    info_file.write("outer_iteration, (movers)," +
                                    ",".join(i_str) + "\n")
                    #info_file.write(", movers,\n")
                    for row in com_rows_index:
                        line = [str(x) for x in demand_history[row, ]]
                        info_file.write(
                            str(alt_id[row]) + "," + ",".join(line) + "\n")

                #import pdb; pdb.set_trace()
                #export prob correlation history
                correlation_indices, prob_correlation = self.compute_prob_correlation(
                    probability_old, probability, prob_hat, index, resources)

                info_file.write("\nCorrelation of Probabilities:\n")
                c_name = [
                    'corr(p_ij p~_ij)', 'corr(p_ij p^_ij)', 'corr(p_ij dummy)',
                    'corr(p~_ij p^_ij)', 'corr(p~_ij dummy)',
                    'corr(p^_ij dummy)'
                ]

                info_file.write("com_id, " + ",".join(c_name) + "\n")

                #info_file.write(", ,\n")
                for row in range(correlation_indices.size):
                    line = [str(x) for x in prob_correlation[row, ]]
                    info_file.write(
                        str(alt_id[correlation_indices[row]]) + "," +
                        ",".join(line) + "\n")

                info_file.close()

                result['pi'] = pi
                return result

        logger.end_block()
        try:
            info_file.close()
        except:
            pass

        raise RuntimeError, "max iteration reached without convergence."
Esempio n. 30
0
 def compute(self, dataset_pool):
     return ln(self.get_dataset().get_attribute(self.comm_den) + 1) / 10
    def run(self, data, upc_sequence, resources=None):
        CLOSE = 0.01

        self.mnl_probabilities=upc_sequence.probability_class
        self.bhhh_estimation = bhhh_mnl_estimation()

        modified_upc_sequence = UPCFactory().get_model(
            utilities=None, probabilities="opus_core.mnl_probabilities", choices=None)
        modified_upc_sequence.utility_class = upc_sequence.utility_class

        result = self.bhhh_estimation.run(data, modified_upc_sequence, resources)
        probability = modified_upc_sequence.get_probabilities()
        probability_0 = probability

        resources.check_obligatory_keys(["capacity"])

        supply = resources["capacity"]

        if not isinstance(supply, ndarray):
            supply = array(supply)
        nsupply = supply.size

        max_iter = resources.get("max_iterations", None)
        if max_iter == None:
            max_iter = 100 # default

        index = resources.get("index", None)
        if index == None:
            index = arange(nsupply)

        neqs = probability.shape[1]
        nobs = probability.shape[0]

        if index.ndim <= 1:
            index = repeat(reshape(index, (1,index.shape[0])), nobs)
        resources.merge({"index":index})

        # WARNING: THE SCALING OF DEMAND IS HARD CODED AND NEEDS TO BE MADE AN ARGUMENT
        # scale demand to represent 100% from a 0.2% sample
        demand = self.mnl_probabilities.get_demand(index, probability, nsupply)*50

        #initial calculations
        sdratio = ma.filled(supply/ma.masked_where(demand==0, demand),2.0)
        sdratio = _round(sdratio, 1.0, atol=CLOSE)
        constrained_locations = logical_and(sdratio<1.0,demand-supply>CLOSE).astype("int8")
        unconstrained_locations = 1-constrained_locations
        excess_demand = (demand-supply)*constrained_locations
        global_excess_demand = excess_demand.sum()

        sdratio_matrix = sdratio[index]
        constrained_locations_matrix = constrained_locations[index]
        constrained_ex_ante = constrained_locations_matrix
# Would like to include following print statements in debug printing
#        logger.log_status('Total demand:',demand.sum())
#        logger.log_status('Total supply:',supply.sum())
        logger.log_status('Global excess demand:',global_excess_demand)
#        logger.log_status('Constrained locations:',constrained_locations.sum())
        unconstrained_locations_matrix = unconstrained_locations[index]

#        omega = ones(nobs,type=float32)
#        pi = self.constrain_probabilities.get_pi(sdratio_matrix, omega, constrained_locations_matrix, unconstrained_locations_matrix, nobs)

        omega = self.mnl_probabilities.get_omega(probability, constrained_locations_matrix, unconstrained_locations_matrix, sdratio_matrix)
        omega = _round(omega, 1.0, CLOSE)

        print 'Num of constrainted locations: ', constrained_locations.sum()
        print 'Num of unconstrainted locations: ', unconstrained_locations.sum()
        print 'Min Ex Ante Constraints:',min(constrained_ex_ante.sum(axis=1))
        print 'Max Ex Ante Constraints:',max(constrained_ex_ante.sum(axis=1))
        #print 'Omega shape',omega.shape
        #print 'Omega histogram',histogram(omega,0,4,40)
        print 'Minimum Omega',min(omega)
        print 'Maximum Omega',max(omega)
        print 'Mean Omega:',mean(omega)
        print 'Median Omega:',median(omega)
        print 'Sum Omega:',omega.sum()
        print 'Standard Deviation Omega:',standard_deviation(omega)
        print 'Count of Negative Omega',(where(omega<0,1,0).sum())
        print 'Count of Omega < 1',(where(omega<1,1,0).sum())
        print 'Count of Omega > 2',(where(omega>2,1,0).sum())
        print 'Count of Omega > 4',(where(omega>4,1,0).sum())

        average_omega = self.mnl_probabilities.get_average_omega(omega, probability, index, nsupply, nobs, demand)
        average_omega=_round(average_omega, 1.0, CLOSE)

        coef_names = resources.get("coefficient_names", None)

        if coef_names is not None:
            coef_names = array(coef_names.tolist()+["ln_pi"])
            resources.merge({"coefficient_names":coef_names})

        data=concatenate((data,ones((nobs,neqs,1),dtype=float32)), axis=2)

        prev_omega = omega
        prev_constrained_locations_matrix = constrained_locations_matrix

        for i in range(max_iter):
            print
            print 'Iteration',i
            pi = self.mnl_probabilities.get_pi(sdratio_matrix, omega, constrained_locations_matrix, unconstrained_locations_matrix, nobs)
            #print 'pi shape',pi.shape
            #print 'data shape', data.shape
            #print 'min_pi',min(pi,axis=1)
            #print 'max_pi',max(pi,axis=1)
            #print 'min_data',min(data,axis=1)
            #print 'max_data',max(data,axis=1)
            data[:,:,-1] = ln(pi)
            #data = concatenate((data,(pi[:,:,newaxis])),axis=-1)

            #print 'data shape after contatenating pi', data.shape
            result = self.bhhh_estimation.run(data, modified_upc_sequence, resources)
            #print
            #print 'result',result
            probability = modified_upc_sequence.get_probabilities()
            prob_hat = ma.filled(probability / pi, 0.0)


            # HARD CODED
            # scale new_demand from 0.2% to 100%
            demand_new = self.mnl_probabilities.get_demand(index, prob_hat, nsupply)*50
            ##update supply-demand ratio
            sdratio = ma.filled(supply/ma.masked_where(demand_new==0, demand_new),2.0)
            sdratio = _round(sdratio, 1.0, CLOSE)
            sdratio_matrix = sdratio[index]

            constrained_locations = where(((average_omega*demand_new - supply) > CLOSE),1,0)
            unconstrained_locations = 1-constrained_locations
            constrained_locations_matrix = constrained_locations[index]
            unconstrained_locations_matrix = unconstrained_locations[index]
            constrained_ex_post = constrained_locations_matrix
            constrained_ex_post_not_ex_ante = where((constrained_ex_post - constrained_ex_ante)==1,1,0)
            constrained_ex_ante_not_ex_post = where((constrained_ex_post - constrained_ex_ante)==-1,1,0)

            #Assumption 5: if j belongs to constrained ex post and unconstrained ex ante, then p^i_j <= D_j / S_j
            print 'Number of individual violating Assumption 5: ', where((probability > 1 / sdratio_matrix)*constrained_ex_post_not_ex_ante)[0].size

            #Assumption 6: pi of constrained locations should be less than 1
            print 'Number of individual violating Assumption 6: ', where((probability * constrained_ex_post).sum(axis=1) >
                                                                         (prob_hat * constrained_ex_post).sum(axis=1))[0].size
            ##OR ?
            #print 'Assumption 6: ', where(pi[where(constrained_locations_matrix)] > 1)[0].size

            print 'number of constrainted locations: ', constrained_locations.sum()
            print 'number of unconstrainted locations: ', unconstrained_locations.sum()
            print 'Min Ex Post Constraints:',min(constrained_ex_post.sum(axis=1))
            print 'Max Ex Post Constraints:',max(constrained_ex_post.sum(axis=1))
            print 'At Least 1 Constrained Ex Ante Not Ex Post*:',where(constrained_ex_ante_not_ex_post.sum(axis=1))[0].size
            print 'At Least 1 Constrained Ex Post Not Ex Ante:',where(constrained_ex_post_not_ex_ante.sum(axis=1))[0].size

            omega = self.mnl_probabilities.get_omega(prob_hat, constrained_locations_matrix, unconstrained_locations_matrix, sdratio_matrix)
            omega = _round(omega, 1.0, CLOSE)
            #print 'Omega histogram',histogram(omega,0,4,40)
            print 'Minimum Omega',min(omega)
            print 'Maximum Omega',max(omega)
            print 'Mean Omega:',mean(omega)
            print 'Median Omega:',median(omega)
            print 'Sum Omega:',omega.sum()
            print 'Standard Deviation Omega:',standard_deviation(omega)
            print 'Count of Negative Omega',(where(omega<0,1,0).sum())
            print 'Count of Omega < 1: ',(where(omega<1,1,0).sum())
            print 'Count of Omega > 2: ',(where(omega>2,1,0).sum())
            print 'Count of Omega > 4: ',(where(omega>4,1,0).sum())

            average_omega = self.mnl_probabilities.get_average_omega(omega, prob_hat, index, nsupply, nobs, demand_new)
            average_omega = _round(average_omega, 1.0, CLOSE)
            excess_demand = (demand_new-supply)*constrained_locations
            global_excess_demand = excess_demand.sum()
            #print 'Omega [i], [i-1]',prev_omega, omega,
            #print 'Constrained locations [i], [i-1]',constrained_locations_matrix, prev_constrained_locations_matrix
            print 'Global Excess Demand',global_excess_demand
            if ma.allclose(omega, prev_omega, atol=1e-3) or not any(constrained_locations_matrix - prev_constrained_ex_ante):
                print 'omega or constrained ex post unchanged: Convergence criterion achieved'
                break

            #if global_excess_demand < 1:
                #print 'Global excess demand < 1: Convergence criterion achieved'
                #break

        return result
Esempio n. 32
0
 def compute(self, dataset_pool):
     return ln(self.get_dataset().get_attribute(self.dep_variable))
Esempio n. 33
0
 def compute(self, dataset_pool): 
     return ln(self.get_dataset().get_attribute(self.house_den) + 1) / 10
Esempio n. 34
0
 def compute(self, dataset_pool):    
     return ln(self.get_dataset().get_attribute(self.income))