Beispiel #1
0
class ln_sampling_probability_for_bias_correction_mnl(Variable):
    """Abstract variable to be used for correcting for sampling bias when sampling alternatives.
    It is assumed to be an interaction variable. The init function gets the name of the attribute that is used 
    for weighting alternatives in the model. It doesn't need to be normalized, that is done within the function.
    """
    def __init__(self, weights_attribute):
        self.weights_attribute_name = weights_attribute
        Variable.__init__(self)
        
    def dependencies_to_add(self, dataset_name, package="urbansim"):
        """Will be added to the dependencies from the compute method, because before that we don't 
        know the dataset name."""
        self.weights_attribute = VariableName("%s.%s.%s" % (package, dataset_name, self.weights_attribute_name))
        return [self.weights_attribute.get_expression(),
                "_normalized_weights_%s = %s/float(sum(%s))" % (self.weights_attribute_name, self.weights_attribute.get_expression(), self.weights_attribute.get_expression()),
                "_log_weights_%s = ln(%s._normalized_weights_%s)" % (self.weights_attribute_name, self.weights_attribute.get_dataset_name(), self.weights_attribute_name),
                "_log_1_minus_weights_%s = ln(1 - %s._normalized_weights_%s)" % (self.weights_attribute_name, self.weights_attribute.get_dataset_name(), self.weights_attribute_name)]
        
    def compute(self, dataset_pool):
        ds = self.get_dataset() # interaction dataset
        self.add_and_solve_dependencies(self.dependencies_to_add(ds.get_dataset(2).get_dataset_name()), dataset_pool)
        log_1_minus_weights = ds.get_dataset(2).get_attribute("_log_1_minus_weights_%s" % self.weights_attribute_name)
        result = log_1_minus_weights.sum() - ds.get_attribute("_log_1_minus_weights_%s" % self.weights_attribute_name).sum(axis=1).reshape((ds.get_reduced_n(),1)) - \
               ds.get_attribute("_log_weights_%s" % self.weights_attribute_name) + ds.get_attribute("_log_weights_%s" % self.weights_attribute_name).sum(axis=1).reshape((ds.get_reduced_n(),1))
        return result - result.max() # shift the values to zero
Beispiel #2
0
 def match_agent_attribute_to_choice(self, name, dataset_pool=None):
     """ Return a tuple where the first element is a 2D array of the attribute 'name_{postfix}'. 
     It is assumed to be an attribute
     of dataset1 (possibly computed). {postfix} is created either by values of the attribute
     'name' of dataset2 (if it has any such attribute), or by the id values of dataset2.
     The second value of the resulting tuple is a list of dependent variables.
     """
     if 'name' in self.get_dataset(2).get_known_attribute_names():
         name_postfix = self.get_attribute_of_dataset('name', 2)
     else:
         name_postfix = self.get_id_attribute_of_dataset(2)
     name_postfix_alt = self.get_id_attribute_of_dataset(2)
     
     dependencies = []
     for i in range(self.get_reduced_m()):
         full_name = VariableName("%s_%s" % (name, name_postfix[i]))
         if full_name.get_dataset_name() is None:
             full_name = VariableName("%s.%s" % (self.get_dataset(1).get_dataset_name(), full_name.get_expression()))
         try:
             self.get_dataset(1).compute_variables(full_name, dataset_pool=dataset_pool)
         except:
             full_name = VariableName("%s_%s" % (name, name_postfix_alt[i]))
             if full_name.get_dataset_name() is None:
                 full_name = VariableName("%s.%s" % (self.get_dataset(1).get_dataset_name(), full_name.get_expression()))
             self.get_dataset(1).compute_variables(full_name, dataset_pool=dataset_pool)
         
         dependencies.append(full_name.get_expression())
         if i == 0:
             result = self.get_attribute(full_name)
         else:
             result[:,i] = self.get_attribute_of_dataset(full_name, 1)
     return result, dependencies
    def _compute_if_needed(self, name, dataset_pool, resources=None, quiet=False, version=None):
        """ Compute variable given by the argument 'name' only if this variable
        has not been computed before.
        Check first if this variable belongs to dataset1 or dataset2.
        dataset_pool holds available datasets.
        """
        if not isinstance(name, VariableName):
            variable_name = VariableName(name)
        else:
            variable_name = name
        short_name = variable_name.get_alias()

        dataset_name = variable_name.get_dataset_name()
        if dataset_name == self.get_dataset_name():
            new_version = UrbansimDataset._compute_if_needed(self, variable_name, dataset_pool, resources, quiet=quiet, version=version)
        else:
            if dataset_name == self.dataset1.get_dataset_name():
                owner_dataset = self.dataset1
#                index = self.get_2d_index_of_dataset1()
            elif dataset_name == self.dataset2.get_dataset_name():
                owner_dataset = self.dataset2
#                index = self.get_2d_index()
            else:
                self._raise_error(StandardError, "Cannot find variable '%s'\nin either dataset or in the interaction set." %
                                variable_name.get_expression())
            owner_dataset.compute_variables([variable_name], dataset_pool, resources=resources, quiet=True)
            new_version =  self.compute_variables_return_versions_and_final_value("%s = %s.disaggregate(%s.%s)" % \
                                   ( short_name, self.get_dataset_name(), owner_dataset.get_dataset_name(), short_name ),
                                   dataset_pool=dataset_pool, resources=resources, quiet=quiet )[0]
        return new_version
Beispiel #4
0
 def _compute_if_needed(self, name, dataset_pool, resources=None, quiet=False, version=None):
     """ Compute variable given by the argument 'name' only if this variable
     has not been computed before.
     Check first if this variable belongs to dataset1 or dataset2.
     dataset_pool holds available datasets.
     """
     if not isinstance(name, VariableName):
         variable_name = VariableName(name)
     else:
         variable_name = name
     short_name = variable_name.get_alias()
     if (short_name in self.get_attribute_names()) and (self.are_dependent_variables_up_to_date(
                         variable_name, version=version)):
         return version #nothing to be done
     dataset_name = variable_name.get_dataset_name()
     if dataset_name == self.get_dataset_name():
         new_version = self._compute_one_variable(variable_name, dataset_pool, resources)
     else:
         owner_dataset, index = self.get_owner_dataset_and_index(dataset_name)
         if owner_dataset is None:
             self._raise_error(StandardError, "Cannot find variable '%s'\nin either dataset or in the interaction set." %
                             variable_name.get_expression())
         owner_dataset.compute_variables([variable_name], dataset_pool, resources=resources, quiet=True)
         new_version = self.add_attribute(data = owner_dataset.get_attribute_by_index(variable_name, index),
             name = variable_name, metadata = AttributeType.COMPUTED)
         attribute_box = owner_dataset._get_attribute_box(variable_name)
         variable = attribute_box.get_variable_instance()
         my_attribute_box = self._get_attribute_box(variable_name)
         my_attribute_box.set_variable_instance(variable)
     return new_version
 def test_fully_qualified_variable(self):
     # this tests an expression consisting of a fully-qualified variable
     expr = "opus_core.test_agent.income_times_2"
     storage = StorageFactory().get_storage("dict_storage")
     storage.write_table(table_name="test_agents", table_data={"income": array([1, 5, 10]), "id": array([1, 3, 4])})
     dataset = Dataset(in_storage=storage, in_table_name="test_agents", id_name="id", dataset_name="test_agent")
     result = dataset.compute_variables([expr])
     should_be = array([2, 10, 20])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_fully_qualified_variable")
     # check that expr is in the cache of known expressions
     # (normally we shouldn't be accessing this private field, but just this once ...)
     cache = VariableName._cache
     self.assert_(expr in cache, msg="did not find expr in cache")
     # check that the access methods for the variable all return the correct values
     name = VariableName(expr)
     self.assertEqual(name.get_package_name(), "opus_core", msg="bad value for package")
     self.assertEqual(name.get_dataset_name(), "test_agent", msg="bad value for dataset")
     self.assertEqual(name.get_short_name(), "income_times_2", msg="bad value for shortname")
     self.assertEqual(name.get_alias(), "income_times_2", msg="bad value for alias")
     self.assertEqual(name.get_autogen_class(), None, msg="bad value for autogen_class")
     # test that the variable can now also be accessed using its short name in an expression
     result2 = dataset.compute_variables(["income_times_2"])
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_variable")
     # check that the cache uses the variable name with whitespace removed
     oldsize = len(cache)
     expr_with_spaces = "opus_core . test_agent. income_times_2  "
     name2 = VariableName(expr_with_spaces)
     newsize = len(cache)
     self.assertEqual(oldsize, newsize, msg="caching error")
     self.assert_(expr_with_spaces not in cache, msg="caching error")
     self.assertEqual(expr_with_spaces, name2.get_expression(), msg="caching error")
     self.assertEqual(name2.get_short_name(), "income_times_2", msg="bad value for shortname")
Beispiel #6
0
 def test_fully_qualified_variable(self):
     # this tests an expression consisting of a fully-qualified variable
     expr = "opus_core.test_agent.income_times_2"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='test_agents',
                         table_data={
                             "income": array([1, 5, 10]),
                             "id": array([1, 3, 4])
                         })
     dataset = Dataset(in_storage=storage,
                       in_table_name='test_agents',
                       id_name="id",
                       dataset_name="test_agent")
     result = dataset.compute_variables([expr])
     should_be = array([2, 10, 20])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6),
                  "Error in test_fully_qualified_variable")
     # check that expr is in the cache of known expressions
     # (normally we shouldn't be accessing this private field, but just this once ...)
     cache = VariableName._cache
     self.assert_(expr in cache, msg="did not find expr in cache")
     # check that the access methods for the variable all return the correct values
     name = VariableName(expr)
     self.assertEqual(name.get_package_name(),
                      'opus_core',
                      msg="bad value for package")
     self.assertEqual(name.get_dataset_name(),
                      'test_agent',
                      msg="bad value for dataset")
     self.assertEqual(name.get_short_name(),
                      'income_times_2',
                      msg="bad value for shortname")
     self.assertEqual(name.get_alias(),
                      'income_times_2',
                      msg="bad value for alias")
     self.assertEqual(name.get_autogen_class(),
                      None,
                      msg="bad value for autogen_class")
     # test that the variable can now also be accessed using its short name in an expression
     result2 = dataset.compute_variables(['income_times_2'])
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6),
                  "Error in accessing a_test_variable")
     # check that the cache uses the variable name with whitespace removed
     oldsize = len(cache)
     expr_with_spaces = "opus_core . test_agent. income_times_2  "
     name2 = VariableName(expr_with_spaces)
     newsize = len(cache)
     self.assertEqual(oldsize, newsize, msg="caching error")
     self.assert_(expr_with_spaces not in cache, msg="caching error")
     self.assertEqual(expr_with_spaces,
                      name2.get_expression(),
                      msg="caching error")
     self.assertEqual(name2.get_short_name(),
                      'income_times_2',
                      msg="bad value for shortname")
Beispiel #7
0
class ln_sampling_probability_for_bias_correction_mnl(Variable):
    """Abstract variable to be used for correcting for sampling bias when sampling alternatives.
    It is assumed to be an interaction variable. The init function gets the name of the attribute that is used 
    for weighting alternatives in the model. It doesn't need to be normalized, that is done within the function.
    """
    def __init__(self, weights_attribute):
        self.weights_attribute_name = weights_attribute
        Variable.__init__(self)

    def dependencies_to_add(self, dataset_name, package="urbansim"):
        """Will be added to the dependencies from the compute method, because before that we don't 
        know the dataset name."""
        self.weights_attribute = VariableName(
            "%s.%s.%s" % (package, dataset_name, self.weights_attribute_name))
        return [
            self.weights_attribute.get_expression(),
            "_normalized_weights_%s = %s/float(sum(%s))" %
            (self.weights_attribute_name,
             self.weights_attribute.get_expression(),
             self.weights_attribute.get_expression()),
            "_log_weights_%s = ln(%s._normalized_weights_%s)" %
            (self.weights_attribute_name,
             self.weights_attribute.get_dataset_name(),
             self.weights_attribute_name),
            "_log_1_minus_weights_%s = ln(1 - %s._normalized_weights_%s)" %
            (self.weights_attribute_name,
             self.weights_attribute.get_dataset_name(),
             self.weights_attribute_name)
        ]

    def compute(self, dataset_pool):
        ds = self.get_dataset()  # interaction dataset
        self.add_and_solve_dependencies(
            self.dependencies_to_add(ds.get_dataset(2).get_dataset_name()),
            dataset_pool)
        log_1_minus_weights = ds.get_dataset(2).get_attribute(
            "_log_1_minus_weights_%s" % self.weights_attribute_name)
        result = log_1_minus_weights.sum() - ds.get_attribute("_log_1_minus_weights_%s" % self.weights_attribute_name).sum(axis=1).reshape((ds.get_reduced_n(),1)) - \
               ds.get_attribute("_log_weights_%s" % self.weights_attribute_name) + ds.get_attribute("_log_weights_%s" % self.weights_attribute_name).sum(axis=1).reshape((ds.get_reduced_n(),1))
        return result - result.max()  # shift the values to zero
    def _compute_if_needed(self,
                           name,
                           dataset_pool,
                           resources=None,
                           quiet=False,
                           version=None):
        """ Compute variable given by the argument 'name' only if this variable
        has not been computed before.
        Check first if this variable belongs to dataset1 or dataset2.
        dataset_pool holds available datasets.
        """
        if not isinstance(name, VariableName):
            variable_name = VariableName(name)
        else:
            variable_name = name
        short_name = variable_name.get_alias()

        dataset_name = variable_name.get_dataset_name()
        if dataset_name == self.get_dataset_name():
            new_version = UrbansimDataset._compute_if_needed(self,
                                                             variable_name,
                                                             dataset_pool,
                                                             resources,
                                                             quiet=quiet,
                                                             version=version)
        else:
            if dataset_name == self.dataset1.get_dataset_name():
                owner_dataset = self.dataset1
#                index = self.get_2d_index_of_dataset1()
            elif dataset_name == self.dataset2.get_dataset_name():
                owner_dataset = self.dataset2


#                index = self.get_2d_index()
            else:
                self._raise_error(
                    StandardError,
                    "Cannot find variable '%s'\nin either dataset or in the interaction set."
                    % variable_name.get_expression())
            owner_dataset.compute_variables([variable_name],
                                            dataset_pool,
                                            resources=resources,
                                            quiet=True)
            new_version =  self.compute_variables_return_versions_and_final_value("%s = %s.disaggregate(%s.%s)" % \
                                   ( short_name, self.get_dataset_name(), owner_dataset.get_dataset_name(), short_name ),
                                   dataset_pool=dataset_pool, resources=resources, quiet=quiet )[0]
        return new_version
 def variable_dependencies(self, name):
     """Prints out dependencies of this variable. 'name' can be either an alias from 
     the model specification or an expression."""
     from opus_core.variables.dependency_query import DependencyChart
     varname = None
     allvars = self.get_specification().get_variable_names()
     for ivar in range(len(allvars)):
         thisvar = allvars[ivar]
         if not isinstance(thisvar, VariableName):
             thisvar = VariableName(thisvar)
         if name == thisvar.get_alias():
             varname = thisvar
             break
     if varname is None:
         varname = VariableName(name)
     chart = DependencyChart(self.xml_configuration)
     chart.print_dependencies(varname.get_expression())
Beispiel #10
0
 def variable_dependencies(self, name):
     """Prints out dependencies of this variable. 'name' can be either an alias from 
     the model specification or an expression."""
     from opus_core.variables.dependency_query import DependencyChart
     varname = None
     allvars = self.get_specification().get_variable_names()
     for ivar in range(len(allvars)):
         thisvar = allvars[ivar]
         if not isinstance(thisvar, VariableName):
             thisvar = VariableName(thisvar)
         if name == thisvar.get_alias():
             varname = thisvar
             break
     if varname is None:
         varname = VariableName(name)
     chart = DependencyChart(self.xml_configuration)
     chart.print_dependencies(varname.get_expression())