Exemple #1
0
 def match_agent_attribute_to_choice(self, name, dataset_pool=None):
     """ Return a tuple where the first element is a 2D array of the attribute 'name_{postfix}'. 
     It is assumed to be an attribute
     of dataset1 (possibly computed). {postfix} is created either by values of the attribute
     'name' of dataset2 (if it has any such attribute), or by the id values of dataset2.
     The second value of the resulting tuple is a list of dependent variables.
     """
     if 'name' in self.get_dataset(2).get_known_attribute_names():
         name_postfix = self.get_attribute_of_dataset('name', 2)
     else:
         name_postfix = self.get_id_attribute_of_dataset(2)
     name_postfix_alt = self.get_id_attribute_of_dataset(2)
     
     dependencies = []
     for i in range(self.get_reduced_m()):
         full_name = VariableName("%s_%s" % (name, name_postfix[i]))
         if full_name.get_dataset_name() is None:
             full_name = VariableName("%s.%s" % (self.get_dataset(1).get_dataset_name(), full_name.get_expression()))
         try:
             self.get_dataset(1).compute_variables(full_name, dataset_pool=dataset_pool)
         except:
             full_name = VariableName("%s_%s" % (name, name_postfix_alt[i]))
             if full_name.get_dataset_name() is None:
                 full_name = VariableName("%s.%s" % (self.get_dataset(1).get_dataset_name(), full_name.get_expression()))
             self.get_dataset(1).compute_variables(full_name, dataset_pool=dataset_pool)
         
         dependencies.append(full_name.get_expression())
         if i == 0:
             result = self.get_attribute(full_name)
         else:
             result[:,i] = self.get_attribute_of_dataset(full_name, 1)
     return result, dependencies
 def run(self, year, condition=None, max_iter=10):
     """
     'year' is the current year of the simulation.
     'condition' should be a boolean expression defined on any dataset.
     The method iterates over the given models until all values of the expression are True. 
     'max_iter' gives the maximum number of iterations to run, if 'condition' is not fulfilled.
     If it is None, there is no limit and thus, the condition must be fulfilled in order to terminate.
     If 'condition' is None, the set of models is run only once.
     """
     self.config['years'] = (year, year)
     if condition is None:
         return self.model_system.run_in_same_process(self.config)
     dataset_pool = SessionConfiguration().get_dataset_pool()
     variable_name = VariableName(condition)
     dataset = dataset_pool.get_dataset(variable_name.get_dataset_name())
     condition_value = dataset.compute_variables(variable_name, dataset_pool=dataset_pool)
     result = None
     iter = 1
     while not alltrue(condition_value):
         result = self.model_system.run_in_same_process(self.config)
         if max_iter is None or iter > max_iter:
             break
         iter = iter + 1
         # force to recompute the condition
         dataset = SessionConfiguration().get_dataset_pool().get_dataset(variable_name.get_dataset_name())
         dataset.delete_computed_attributes()
         condition_value = dataset.compute_variables(variable_name, 
                                                     dataset_pool=SessionConfiguration().get_dataset_pool())
     if not alltrue(condition_value):
         logger.log_status('%s did not converge. Maximum number of iterations (%s) reached.' % (self.model_name, max_iter))
     else:
         logger.log_status('%s converged in %s iterations.' % (self.model_name, iter-1))  
     return result
Exemple #3
0
 def check_parse_errors(self, variables):
     # check the variables in the expression library as indexed by the list 'variables'.
     errors = []
     for (var_name, dataset_name, use, source, expr)  in variables:
         # special case -- the 'constant' expression always passes
         if expr.strip()=='constant' and var_name=='constant':
             continue
         try:
             n = VariableName(expr)
             # check that the expression is of the correct form given the source
             if source=='primary attribute':
                 if n.get_autogen_class() is not None:
                     errors.append("Error - this is parsing as an expression rather than as a primary attribute: (%s, %s): %s" % (var_name, dataset_name, expr))
                 elif n.get_dataset_name() is None:
                     errors.append("Error in primary attribute - missing dataset name: (%s, %s): %s" % (var_name, dataset_name, expr))
                 elif dataset_name!=n.get_dataset_name():
                     errors.append("Error in primary attribute - dataset name mismatch: (%s, %s): %s" % (var_name, dataset_name, expr))
                 elif n.get_package_name() is not None:
                     errors.append("Error in primary attribute - shouldn't have package name: (%s, %s): %s" % (var_name, dataset_name, expr))
             elif source=='expression':
                 if n.get_autogen_class() is None:
                     errors.append("Error - this doesn't seem to be an expression.  Maybe it should be a Python class or primary attribute?: (%s, %s): %s" % (var_name, dataset_name, expr))
             elif source=='Python class':
                 if n.get_autogen_class() is not None:
                     errors.append("Error - this is parsing as an expression rather than as a Python class reference: (%s, %s): %s" % (var_name, dataset_name, expr))
                 elif n.get_package_name() is None:
                     errors.append("Error - missing package name in Python class reference: (%s, %s): %s" % (var_name, dataset_name, expr))
                 elif n.get_dataset_name() is None:
                     errors.append("Error - missing dataset name in Python class reference: (%s, %s): %s" % (var_name, dataset_name, expr))
                 elif dataset_name!=n.get_dataset_name():
                     errors.append("Error - dataset name  mismatch in Python class reference: (%s, %s): %s" % (var_name, dataset_name, expr))
             else:
                 errors.append("Unknown source type %s: (%s, %s): %s" % (source, var_name, dataset_name, expr))
         except (SyntaxError, ValueError), e:
             errors.append("Parsing error: (%s, %s): %s" % (var_name, dataset_name, str(e)))
 def create_and_check_qualified_variable_name(self, name):
     """Convert name to a VariableName if it isn't already, and add dataset_name to
     the VariableName if it is missing.  If it already has a dataset_name, make sure
     it is the same as the name of this dataset.
     """
     if isinstance(name, VariableName):
         vname = name
     else:
         vname = VariableName(name)
     if vname.get_dataset_name() is None:
         vname.set_dataset_name(self.get_dataset_name())
     else:
         self._check_dataset_name(vname.get_dataset_name())
         
     return vname
    def create_and_check_qualified_variable_name(self, name):
        """Convert name to a VariableName if it isn't already, and add dataset_name to
        the VariableName if it is missing.  If it already has a dataset_name, make sure
        it is the same as the name of this dataset.
        """
        if isinstance(name, VariableName):
            vname = name
        else:
            vname = VariableName(name)
        if vname.get_dataset_name() is None:
            vname.set_dataset_name(self.get_dataset_name())
        else:
            self._check_dataset_name(vname.get_dataset_name())

        return vname
 def test_fully_qualified_DDD_SSS_variable(self):
     # this should use the test variable a_test_SSS_variable_DDD_SSS
     expr = "opus_core.tests.a_test_squid_variable_42_clam"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
         table_name='tests',
         table_data={
             "a_dependent_variable":array([1,5,10]),
             "id":array([1,3,4])
             }
         )
     dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests")
     result = dataset.compute_variables([expr])
     should_be = array([10,50,100])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_fully_qualified_DDD_SSS_variable")
     # check that the access methods for the variable all return the correct values
     name = VariableName(expr)
     self.assertEqual(name.get_package_name(), 'opus_core', msg="bad value for package")
     self.assertEqual(name.get_dataset_name(), 'tests', msg="bad value for dataset")
     self.assertEqual(name.get_short_name(), 'a_test_squid_variable_42_clam', msg="bad value for shortname")
     self.assertEqual(name.get_alias(), 'a_test_squid_variable_42_clam', msg="bad value for alias")
     self.assertEqual(name.get_autogen_class(), None, msg="bad value for autogen_class")
     # test that the variable can now also be accessed using its short name in an expression
     result2 = dataset.compute_variables(['a_test_squid_variable_42_clam'])
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_squid_variable_42_clam")
Exemple #7
0
 def test_unary_functions_fully_qualified_name(self):
     # this tests expressions with unary functions applied to a fully qualified name
     expr = "sqrt(opus_core.tests.a_test_variable)"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
         table_name='tests',
         table_data={
             "a_dependent_variable":array([1,5,10]),
             "id":array([1,3,4])
             }
         )
     dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests")
     result = dataset.compute_variables([expr])
     should_be = array([3.16227766, 7.0710678, 10])
     self.assertEqual(ma.allclose(result, should_be, rtol=1e-3), True, msg="error in test_unary_functions_fully_qualified_name")
     # check that the access methods for the variable all return the correct values
     name = VariableName(expr)
     autogen = name.get_autogen_class()
     self.assert_(issubclass(autogen, Variable), msg="autogen'd class isn't a Variable")
     self.assertEqual(name.get_package_name(), None, msg="bad value for package")
     self.assertEqual(name.get_dataset_name(), 'tests', msg="bad value for dataset")
     self.assertEqual(name.get_short_name(), autogen.__name__, msg="bad value for shortname")
     self.assertEqual(name.get_alias(), autogen.__name__, msg="bad value for alias")
     # make an instance of the class and check the dependencies (since the dependent variables
     # all have fully-qualifed names we don't need to associate a dataset with the variable
     # for this test)
     self.assertEqual(autogen().dependencies(), ['opus_core.tests.a_test_variable'], 
                      msg="dependencies are incorrect")
 def test_fully_qualified_variable(self):
     # this tests an expression consisting of a fully-qualified variable
     expr = "opus_core.test_agent.income_times_2"
     storage = StorageFactory().get_storage("dict_storage")
     storage.write_table(table_name="test_agents", table_data={"income": array([1, 5, 10]), "id": array([1, 3, 4])})
     dataset = Dataset(in_storage=storage, in_table_name="test_agents", id_name="id", dataset_name="test_agent")
     result = dataset.compute_variables([expr])
     should_be = array([2, 10, 20])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_fully_qualified_variable")
     # check that expr is in the cache of known expressions
     # (normally we shouldn't be accessing this private field, but just this once ...)
     cache = VariableName._cache
     self.assert_(expr in cache, msg="did not find expr in cache")
     # check that the access methods for the variable all return the correct values
     name = VariableName(expr)
     self.assertEqual(name.get_package_name(), "opus_core", msg="bad value for package")
     self.assertEqual(name.get_dataset_name(), "test_agent", msg="bad value for dataset")
     self.assertEqual(name.get_short_name(), "income_times_2", msg="bad value for shortname")
     self.assertEqual(name.get_alias(), "income_times_2", msg="bad value for alias")
     self.assertEqual(name.get_autogen_class(), None, msg="bad value for autogen_class")
     # test that the variable can now also be accessed using its short name in an expression
     result2 = dataset.compute_variables(["income_times_2"])
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_variable")
     # check that the cache uses the variable name with whitespace removed
     oldsize = len(cache)
     expr_with_spaces = "opus_core . test_agent. income_times_2  "
     name2 = VariableName(expr_with_spaces)
     newsize = len(cache)
     self.assertEqual(oldsize, newsize, msg="caching error")
     self.assert_(expr_with_spaces not in cache, msg="caching error")
     self.assertEqual(expr_with_spaces, name2.get_expression(), msg="caching error")
     self.assertEqual(name2.get_short_name(), "income_times_2", msg="bad value for shortname")
Exemple #9
0
 def test_unary_functions_fully_qualified_name(self):
     # this tests expressions with unary functions applied to a fully qualified name
     expr = "sqrt(opus_core.tests.a_test_variable)"
     storage = StorageFactory().get_storage("dict_storage")
     storage.write_table(
         table_name="tests", table_data={"a_dependent_variable": array([1, 5, 10]), "id": array([1, 3, 4])}
     )
     dataset = Dataset(in_storage=storage, in_table_name="tests", id_name="id", dataset_name="tests")
     result = dataset.compute_variables([expr])
     should_be = array([3.16227766, 7.0710678, 10])
     self.assertEqual(
         ma.allclose(result, should_be, rtol=1e-3), True, msg="error in test_unary_functions_fully_qualified_name"
     )
     # check that the access methods for the variable all return the correct values
     name = VariableName(expr)
     autogen = name.get_autogen_class()
     self.assert_(issubclass(autogen, Variable), msg="autogen'd class isn't a Variable")
     self.assertEqual(name.get_package_name(), None, msg="bad value for package")
     self.assertEqual(name.get_dataset_name(), "tests", msg="bad value for dataset")
     self.assertEqual(name.get_short_name(), autogen.__name__, msg="bad value for shortname")
     self.assertEqual(name.get_alias(), autogen.__name__, msg="bad value for alias")
     # make an instance of the class and check the dependencies (since the dependent variables
     # all have fully-qualifed names we don't need to associate a dataset with the variable
     # for this test)
     self.assertEqual(
         autogen().dependencies(), ["opus_core.tests.a_test_variable"], msg="dependencies are incorrect"
     )
    def get_attribute(self, name):
        """ Return an array of the (by the argument name) given attribute. """
        if not isinstance(name, VariableName):
            attr_name = VariableName(name)
        else:
            attr_name = name
        alias = attr_name.get_alias()
        dataset_name = attr_name.get_dataset_name()
        if not (alias in self.get_attribute_names()):
            if dataset_name == self.get_dataset(1).dataset_name:
                index = self.get_2d_index_of_dataset1()
                return self.get_dataset(1).get_attribute_by_index(
                    attr_name, index)
            if dataset_name == self.get_dataset(2).dataset_name:
                index = self.get_2d_index()
                return self.get_dataset(2).get_attribute_by_index(
                    attr_name, index)

            if alias in self.get_dataset(1).get_known_attribute_names():
                index = self.get_2d_index_of_dataset1()
                return self.get_dataset(1).get_attribute_by_index(
                    attr_name, index)
            if alias in self.get_dataset(2).get_known_attribute_names():
                index = self.get_2d_index()
                return self.get_dataset(2).get_attribute_by_index(
                    attr_name, index)
            self._raise_error(NameError, "Variable %s not found!" % alias)
        return self.attribute_boxes[alias].get_data()
Exemple #11
0
 def _do_flush_dependent_variables_if_required(self):
     try:
         if not SessionConfiguration().get('flush_variables', False):
             return
     except:
         return
     from opus_core.datasets.interaction_dataset import InteractionDataset
     dataset = self.get_dataset()
     dependencies = self.get_current_dependencies()
     my_dataset_name = dataset.get_dataset_name()
     for iattr in range(
             len(dependencies)):  # iterate over dependent variables
         dep_item = dependencies[iattr][0]
         if isinstance(dep_item, str):
             depvar_name = VariableName(dep_item)
         else:
             depvar_name = dep_item.get_variable_name(
             )  # dep_item should be an instance of AttributeBox
         dataset_name = depvar_name.get_dataset_name()
         if dataset_name == my_dataset_name:
             ds = dataset
         else:
             ds = SessionConfiguration().get_dataset_from_pool(dataset_name)
             #ds = dataset_pool.get_dataset('dataset_name')
         if not isinstance(ds, InteractionDataset):
             short_name = depvar_name.get_alias()
             if short_name not in ds.get_id_name():
                 ds.flush_attribute(depvar_name)
Exemple #12
0
 def test_alias_attribute(self):
     # this tests an expression consisting of an alias for a primary attribute
     expr = "p = persons"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='tests',
                         table_data={
                             "persons": array([1, 5, 10]),
                             "id": array([1, 3, 4])
                         })
     dataset = Dataset(in_storage=storage,
                       in_table_name='tests',
                       id_name="id",
                       dataset_name="tests")
     result = dataset.compute_variables([expr])
     self.assertEqual(ma.allclose(result, [1, 5, 10], rtol=1e-7),
                      True,
                      msg="error in test_alias_attribute")
     # check that the access methods for the variable all return the correct values
     name = VariableName(expr)
     self.assertEqual(name.get_package_name(),
                      None,
                      msg="bad value for package")
     self.assertEqual(name.get_dataset_name(),
                      None,
                      msg="bad value for dataset")
     self.assert_(name.get_short_name().startswith('autogen'),
                  msg="bad value for shortname")
     self.assertEqual(name.get_alias(), 'p', msg="bad value for alias")
     self.assertNotEqual(name.get_autogen_class(),
                         None,
                         msg="bad value for autogen_class")
 def test_constants(self):
     # test an expression involving two dataset names, one of which is *_constant
     expr = "test_agent.age<=opus_constant.young_age"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
         table_name='test_agents',
         table_data={
             "age":array([30,20,60,80]),
             "id":array([1,3,4,10])
             }
         )
     storage.write_table(
         table_name='opus_constants',
         table_data={
             "young_age":array([35]),
             "opus_constant_id":array([1])
             }
         )
     dataset_pool = DatasetPool(storage=storage)
     # Test that the dataset name is correct for expr.  It should be test_agent -- opus_constant just holds constants, 
     # and is ignored as far as finding the dataset name for the expression.
     name = VariableName(expr)
     autogen = name.get_autogen_class()
     self.assertEqual(name.get_package_name(), None)
     self.assertEqual(name.get_dataset_name(), 'test_agent')
     # make an instance of the class and check the dependencies (it shouldn't depend on urbansim_constant)
     self.assertEqual(autogen().dependencies(), ['test_agent.age'])
     dataset = Dataset(in_storage=storage, in_table_name='test_agents', id_name="id", dataset_name="test_agent")
     result = dataset.compute_variables([expr], dataset_pool=dataset_pool)
     should_be = array( [True,True,False,False] )
     self.assertEqual( ma.allequal( result, should_be), True)
Exemple #14
0
 def test_interaction_set_component(self):
     # test a fully-qualified variable that applies to a component of an interaction set
     expr = "opus_core.test_agent.income_times_2"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
         table_name='test_agents', 
         table_data={'id': array([1, 2, 3]), 'income': array([1, 20, 500])}
         )
     storage.write_table(
         table_name='test_locations', 
         table_data={'id': array([1,2]), 'cost': array([1000, 2000])}
         )
     dataset_pool = DatasetPool(package_order=['opus_core'], storage=storage)
     test_agent_x_test_location = dataset_pool.get_dataset('test_agent_x_test_location')
     result = test_agent_x_test_location.compute_variables(expr, dataset_pool=dataset_pool)
     should_be = array([[2, 2], [40, 40], [1000, 1000]])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6), msg = "Error in " + expr)
     # test that the interaction set now has this as an attribute
     result2 = test_agent_x_test_location.get_attribute('income_times_2')
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6), msg = "Error in " + expr)
     # test that the variable can now also be accessed using its short name
     result3 = test_agent_x_test_location.compute_variables(['income_times_2'])
     self.assert_(ma.allclose(result3, should_be, rtol=1e-6), msg = "Error in " + expr)
     # even though we're using this with an interaction set, the dataset name for expr
     # should be the name of the component set (since that's the only one mentioned in expr)
     name = VariableName(expr)
     self.assertEqual(name.get_dataset_name(), 'test_agent', msg="bad value for dataset")
Exemple #15
0
 def test_alias_fully_qualified_variable(self):
     expr = "x = opus_core.tests.a_test_variable"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='tests',
                         table_data={
                             "a_dependent_variable": array([1, 5, 10]),
                             "id": array([1, 3, 4])
                         })
     dataset = Dataset(in_storage=storage,
                       in_table_name='tests',
                       id_name="id",
                       dataset_name="tests")
     result = dataset.compute_variables([expr])
     should_be = array([10, 50, 100])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6),
                  "Error in test_alias_fully_qualified_variable")
     # check that the new var has x as an alias
     v = VariableName(expr)
     self.assertEqual(v.get_package_name(),
                      None,
                      msg="bad value for package_name")
     self.assertEqual(v.get_dataset_name(),
                      'tests',
                      msg="bad value for dataset_name")
     self.assert_(v.get_short_name().startswith('autogen'),
                  msg="bad value for shortname")
     self.assertEqual(v.get_alias(), 'x', msg="bad value for alias")
     self.assertNotEqual(v.get_autogen_class(),
                         None,
                         msg="bad value for autogen_class")
     # check that the alias has the correct value
     result2 = dataset.compute_variables(['x'])
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6),
                  "Error in accessing a_test_variable")
Exemple #16
0
 def test_multiply(self):
     expr = 'test_agent.income*test_location.cost'
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
         table_name='test_agents', 
         table_data={'id': array([1, 2, 3]), 'income': array([1, 20, 500])}
         )
     storage.write_table(
         table_name='test_locations', 
         table_data={'id': array([1,2]), 'cost': array([1000, 2000])}
         )
     dataset_pool = DatasetPool(package_order=['opus_core'], storage=storage)
     test_agent_x_test_location = dataset_pool.get_dataset('test_agent_x_test_location')
     result = test_agent_x_test_location.compute_variables(expr, dataset_pool=dataset_pool)
     should_be = array([[1000, 2000], 
                        [20000, 40000], 
                        [500000, 1000000]])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6), msg = "Error in " + expr)
     name = VariableName(expr)
     # since the expression involves both test_agent and test_location, the dataset name should be None
     # and the interaction set names should be (test_agent, test_location) or (test_location, test_agent)
     self.assertEqual(name.get_dataset_name(), None)
     names = name.get_interaction_set_names()
     self.assertEqual(len(names),2)
     self.assert_('test_agent' in names)
     self.assert_('test_location' in names)
 def compute_expression(self, attribute_name):
     """Compute any expression and return its values."""
     var_name = VariableName(attribute_name)
     dataset_name = var_name.get_dataset_name()
     ds = self.get_dataset(dataset_name)
     return ds.compute_variables([var_name],
                                 dataset_pool=self.get_dataset_pool())
    def _compute_if_needed(self, name, dataset_pool, resources=None, quiet=False, version=None):
        """ Compute variable given by the argument 'name' only if this variable
        has not been computed before.
        Check first if this variable belongs to dataset1 or dataset2.
        dataset_pool holds available datasets.
        """
        if not isinstance(name, VariableName):
            variable_name = VariableName(name)
        else:
            variable_name = name
        short_name = variable_name.get_alias()

        dataset_name = variable_name.get_dataset_name()
        if dataset_name == self.get_dataset_name():
            new_version = UrbansimDataset._compute_if_needed(self, variable_name, dataset_pool, resources, quiet=quiet, version=version)
        else:
            if dataset_name == self.dataset1.get_dataset_name():
                owner_dataset = self.dataset1
#                index = self.get_2d_index_of_dataset1()
            elif dataset_name == self.dataset2.get_dataset_name():
                owner_dataset = self.dataset2
#                index = self.get_2d_index()
            else:
                self._raise_error(StandardError, "Cannot find variable '%s'\nin either dataset or in the interaction set." %
                                variable_name.get_expression())
            owner_dataset.compute_variables([variable_name], dataset_pool, resources=resources, quiet=True)
            new_version =  self.compute_variables_return_versions_and_final_value("%s = %s.disaggregate(%s.%s)" % \
                                   ( short_name, self.get_dataset_name(), owner_dataset.get_dataset_name(), short_name ),
                                   dataset_pool=dataset_pool, resources=resources, quiet=quiet )[0]
        return new_version
Exemple #19
0
 def test_constants(self):
     # test an expression involving two dataset names, one of which is *_constant
     expr = "test_agent.age<=opus_constant.young_age"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='test_agents',
                         table_data={
                             "age": array([30, 20, 60, 80]),
                             "id": array([1, 3, 4, 10])
                         })
     storage.write_table(table_name='opus_constants',
                         table_data={
                             "young_age": array([35]),
                             "opus_constant_id": array([1])
                         })
     dataset_pool = DatasetPool(storage=storage)
     # Test that the dataset name is correct for expr.  It should be test_agent -- opus_constant just holds constants,
     # and is ignored as far as finding the dataset name for the expression.
     name = VariableName(expr)
     autogen = name.get_autogen_class()
     self.assertEqual(name.get_package_name(), None)
     self.assertEqual(name.get_dataset_name(), 'test_agent')
     # make an instance of the class and check the dependencies (it shouldn't depend on urbansim_constant)
     self.assertEqual(autogen().dependencies(), ['test_agent.age'])
     dataset = Dataset(in_storage=storage,
                       in_table_name='test_agents',
                       id_name="id",
                       dataset_name="test_agent")
     result = dataset.compute_variables([expr], dataset_pool=dataset_pool)
     should_be = array([True, True, False, False])
     self.assertEqual(ma.allequal(result, should_be), True)
Exemple #20
0
 def _compute_if_needed(self, name, dataset_pool, resources=None, quiet=False, version=None):
     """ Compute variable given by the argument 'name' only if this variable
     has not been computed before.
     Check first if this variable belongs to dataset1 or dataset2.
     dataset_pool holds available datasets.
     """
     if not isinstance(name, VariableName):
         variable_name = VariableName(name)
     else:
         variable_name = name
     short_name = variable_name.get_alias()
     if (short_name in self.get_attribute_names()) and (self.are_dependent_variables_up_to_date(
                         variable_name, version=version)):
         return version #nothing to be done
     dataset_name = variable_name.get_dataset_name()
     if dataset_name == self.get_dataset_name():
         new_version = self._compute_one_variable(variable_name, dataset_pool, resources)
     else:
         owner_dataset, index = self.get_owner_dataset_and_index(dataset_name)
         if owner_dataset is None:
             self._raise_error(StandardError, "Cannot find variable '%s'\nin either dataset or in the interaction set." %
                             variable_name.get_expression())
         owner_dataset.compute_variables([variable_name], dataset_pool, resources=resources, quiet=True)
         new_version = self.add_attribute(data = owner_dataset.get_attribute_by_index(variable_name, index),
             name = variable_name, metadata = AttributeType.COMPUTED)
         attribute_box = owner_dataset._get_attribute_box(variable_name)
         variable = attribute_box.get_variable_instance()
         my_attribute_box = self._get_attribute_box(variable_name)
         my_attribute_box.set_variable_instance(variable)
     return new_version
 def compute_expression(self, attribute_name):
     """Compute any expression and return its values."""
     var_name = VariableName(attribute_name)
     dataset_name = var_name.get_dataset_name()
     ds = self.get_dataset(dataset_name)
     return ds.compute_variables([var_name],
                                 dataset_pool=self.get_dataset_pool())
 def _do_flush_dependent_variables_if_required(self):
     try:
         if not SessionConfiguration().get('flush_variables', False):
             return
     except:
         return
     from opus_core.datasets.interaction_dataset import InteractionDataset
     dataset = self.get_dataset()
     dependencies = self.get_current_dependencies()
     my_dataset_name = dataset.get_dataset_name()
     for iattr in range(len(dependencies)): # iterate over dependent variables
         dep_item = dependencies[iattr][0]
         if isinstance(dep_item, str):
             depvar_name = VariableName(dep_item)
         else:
             depvar_name = dep_item.get_variable_name() # dep_item should be an instance of AttributeBox
         dataset_name = depvar_name.get_dataset_name()
         if dataset_name == my_dataset_name:
             ds = dataset
         else:
             ds = SessionConfiguration().get_dataset_from_pool(dataset_name)
             #ds = dataset_pool.get_dataset('dataset_name')
         if not isinstance(ds, InteractionDataset):
             short_name = depvar_name.get_alias()
             if short_name not in ds.get_id_name():   
                 ds.flush_attribute(depvar_name)
 def compute_expression(self, attribute_name):
     """Compute any expression and return its values."""
     var_name = VariableName(attribute_name)
     dataset_name = var_name.get_dataset_name()
     ds = self.get_dataset(dataset_name)
     return ds.compute_variables(
         [var_name],
         dataset_pool=self.model_system.run_year_namespace["dataset_pool"])
 def prepare_for_run(self, expressions_to_compute=None, dataset_pool=None):
     if expressions_to_compute is not None:
         if dataset_pool is None:
             dataset_pool = SessionConfiguration().get_dataset_pool()
         for expression in expressions_to_compute:
             vn = VariableName(expression)
             dataset_name = vn.get_dataset_name()
             dataset = dataset_pool[dataset_name]
             dataset.compute_variables(expression)
Exemple #25
0
 def compute_m(self, year, quantity_of_interest):
     variable_name = VariableName(quantity_of_interest)
     dataset_name = variable_name.get_dataset_name()
     for i in range(self.number_of_runs):
         ds = self._compute_variable_for_one_run(i, variable_name, dataset_name, year, self.observed_data.get_quantity_object(quantity_of_interest))
         if i == 0: # first run
             self.m = zeros((ds.size(), self.number_of_runs), dtype=float32)
             self.m_ids = ds.get_id_attribute()
         self.m[:, i] = try_transformation(ds.get_attribute(variable_name), self.transformation_pair_for_prediction[0])
Exemple #26
0
 def run(self, year, cache_directory=None):
     """The class is initialized with the appropriate configuration info from the 
     travel_model_configuration part of this config, and then copies the specified 
     UrbanSim data into files for daysim to read.
     The variables/expressions to export are defined in the node travel_model_configuration/urbansim_to_tm_variable_mapping
     of the configuration file.
     """
     if cache_directory is None:
         cache_directory = self.config['cache_directory']
     simulation_state = SimulationState()
     simulation_state.set_cache_directory(cache_directory)
     simulation_state.set_current_time(year)
     attribute_cache = AttributeCache()
     sc = SessionConfiguration(new_instance=True,
                               package_order=self.config['dataset_pool_configuration'].package_order,
                               in_storage=attribute_cache)
     dataset_pool = sc.get_dataset_pool()
     tm_config = self.config['travel_model_configuration']
     data_to_export = tm_config['urbansim_to_tm_variable_mapping']
     
     table_names = data_to_export.keys()
     variable_names = {}
     datasets = {}
     filenames = {}
     in_table_names = {}
     for table_name in table_names:
         filter = data_to_export[table_name].get('__filter__', None)
         if filter is not None:
             del data_to_export[table_name]['__filter__']
         out_table_name = data_to_export[table_name].get('__out_table_name__', None)
         if out_table_name is not None:
             del data_to_export[table_name]['__out_table_name__']
         else:
             out_table_name = table_name
         variables_to_export = map(lambda alias: "%s = %s" % (alias, data_to_export[table_name][alias]), data_to_export[table_name].keys())
         dataset_name = None            
         for var in variables_to_export:
             var_name = VariableName(var)
             if dataset_name is None:
                 dataset_name = var_name.get_dataset_name()
                 ds = dataset_pool.get_dataset(dataset_name)
                 
                 datasets[dataset_name] = ds
                 filenames[dataset_name] = out_table_name
                 in_table_names[dataset_name] = table_name
                 if dataset_name not in variable_names.keys():
                     variable_names[dataset_name] = []
             variable_names[dataset_name].append(var_name.get_alias())                
             ds.compute_variables([var_name], dataset_pool=dataset_pool)
         if filter is not None:
             filter_idx = where(ds.compute_variables(["__filter__ = %s" % filter], dataset_pool=dataset_pool)>0)[0]
             ds = DatasetSubset(ds, index = filter_idx)
             datasets[dataset_name] = ds
             
     return self._call_input_file_writer(year, datasets, in_table_names, filenames, variable_names, dataset_pool)
Exemple #27
0
 def test_fully_qualified_variable(self):
     # this tests an expression consisting of a fully-qualified variable
     expr = "opus_core.test_agent.income_times_2"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='test_agents',
                         table_data={
                             "income": array([1, 5, 10]),
                             "id": array([1, 3, 4])
                         })
     dataset = Dataset(in_storage=storage,
                       in_table_name='test_agents',
                       id_name="id",
                       dataset_name="test_agent")
     result = dataset.compute_variables([expr])
     should_be = array([2, 10, 20])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6),
                  "Error in test_fully_qualified_variable")
     # check that expr is in the cache of known expressions
     # (normally we shouldn't be accessing this private field, but just this once ...)
     cache = VariableName._cache
     self.assert_(expr in cache, msg="did not find expr in cache")
     # check that the access methods for the variable all return the correct values
     name = VariableName(expr)
     self.assertEqual(name.get_package_name(),
                      'opus_core',
                      msg="bad value for package")
     self.assertEqual(name.get_dataset_name(),
                      'test_agent',
                      msg="bad value for dataset")
     self.assertEqual(name.get_short_name(),
                      'income_times_2',
                      msg="bad value for shortname")
     self.assertEqual(name.get_alias(),
                      'income_times_2',
                      msg="bad value for alias")
     self.assertEqual(name.get_autogen_class(),
                      None,
                      msg="bad value for autogen_class")
     # test that the variable can now also be accessed using its short name in an expression
     result2 = dataset.compute_variables(['income_times_2'])
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6),
                  "Error in accessing a_test_variable")
     # check that the cache uses the variable name with whitespace removed
     oldsize = len(cache)
     expr_with_spaces = "opus_core . test_agent. income_times_2  "
     name2 = VariableName(expr_with_spaces)
     newsize = len(cache)
     self.assertEqual(oldsize, newsize, msg="caching error")
     self.assert_(expr_with_spaces not in cache, msg="caching error")
     self.assertEqual(expr_with_spaces,
                      name2.get_expression(),
                      msg="caching error")
     self.assertEqual(name2.get_short_name(),
                      'income_times_2',
                      msg="bad value for shortname")
Exemple #28
0
class ln_sampling_probability_for_bias_correction_mnl(Variable):
    """Abstract variable to be used for correcting for sampling bias when sampling alternatives.
    It is assumed to be an interaction variable. The init function gets the name of the attribute that is used 
    for weighting alternatives in the model. It doesn't need to be normalized, that is done within the function.
    """
    def __init__(self, weights_attribute):
        self.weights_attribute_name = weights_attribute
        Variable.__init__(self)

    def dependencies_to_add(self, dataset_name, package="urbansim"):
        """Will be added to the dependencies from the compute method, because before that we don't 
        know the dataset name."""
        self.weights_attribute = VariableName(
            "%s.%s.%s" % (package, dataset_name, self.weights_attribute_name))
        return [
            self.weights_attribute.get_expression(),
            "_normalized_weights_%s = %s/float(sum(%s))" %
            (self.weights_attribute_name,
             self.weights_attribute.get_expression(),
             self.weights_attribute.get_expression()),
            "_log_weights_%s = ln(%s._normalized_weights_%s)" %
            (self.weights_attribute_name,
             self.weights_attribute.get_dataset_name(),
             self.weights_attribute_name),
            "_log_1_minus_weights_%s = ln(1 - %s._normalized_weights_%s)" %
            (self.weights_attribute_name,
             self.weights_attribute.get_dataset_name(),
             self.weights_attribute_name)
        ]

    def compute(self, dataset_pool):
        ds = self.get_dataset()  # interaction dataset
        self.add_and_solve_dependencies(
            self.dependencies_to_add(ds.get_dataset(2).get_dataset_name()),
            dataset_pool)
        log_1_minus_weights = ds.get_dataset(2).get_attribute(
            "_log_1_minus_weights_%s" % self.weights_attribute_name)
        result = log_1_minus_weights.sum() - ds.get_attribute("_log_1_minus_weights_%s" % self.weights_attribute_name).sum(axis=1).reshape((ds.get_reduced_n(),1)) - \
               ds.get_attribute("_log_weights_%s" % self.weights_attribute_name) + ds.get_attribute("_log_weights_%s" % self.weights_attribute_name).sum(axis=1).reshape((ds.get_reduced_n(),1))
        return result - result.max()  # shift the values to zero
 def run(self, year, condition=None, max_iter=10):
     """
     'year' is the current year of the simulation.
     'condition' should be a boolean expression defined on any dataset.
     The method iterates over the given models until all values of the expression are True. 
     'max_iter' gives the maximum number of iterations to run, if 'condition' is not fulfilled.
     If it is None, there is no limit and thus, the condition must be fulfilled in order to terminate.
     If 'condition' is None, the set of models is run only once.
     """
     self.config['years'] = (year, year)
     if condition is None:
         return self.model_system.run_in_same_process(self.config)
     dataset_pool = SessionConfiguration().get_dataset_pool()
     variable_name = VariableName(condition)
     dataset = dataset_pool.get_dataset(variable_name.get_dataset_name())
     condition_value = dataset.compute_variables(variable_name,
                                                 dataset_pool=dataset_pool)
     result = None
     iter = 1
     while not alltrue(condition_value):
         result = self.model_system.run_in_same_process(self.config)
         if max_iter is None or iter > max_iter:
             break
         iter = iter + 1
         # force to recompute the condition
         dataset = SessionConfiguration().get_dataset_pool().get_dataset(
             variable_name.get_dataset_name())
         dataset.delete_computed_attributes()
         condition_value = dataset.compute_variables(
             variable_name,
             dataset_pool=SessionConfiguration().get_dataset_pool())
     if not alltrue(condition_value):
         logger.log_status(
             '%s did not converge. Maximum number of iterations (%s) reached.'
             % (self.model_name, max_iter))
     else:
         logger.log_status('%s converged in %s iterations.' %
                           (self.model_name, iter - 1))
     return result
Exemple #30
0
 def compute_m(self, year, quantity_of_interest, values=None, ids=None):
     if (values is not None) and (ids is not None):
         self._get_m_from_values(values, ids)
         return
     variable_name = VariableName(quantity_of_interest)
     dataset_name = variable_name.get_dataset_name()
     for i in range(self.cache_set.size):
         ds = self._compute_variable_for_one_run(i, variable_name, dataset_name, year)
         if i == 0: # first run
             m = zeros((ds.size(), self.cache_set.size), dtype=float32)
             self.m_ids = ds.get_id_attribute()
         m[:, i] = try_transformation(ds.get_attribute(variable_name), self.transformation_pair_for_prediction[0])
     self.m = resize(average(m, axis=1), (m.shape[0], 1))
 def test_dataset_qualified_attribute(self):
     expr = "tests.persons"
     storage = StorageFactory().get_storage("dict_storage")
     storage.write_table(table_name="tests", table_data={"persons": array([1, 5, 10]), "id": array([1, 3, 4])})
     dataset = Dataset(in_storage=storage, in_table_name="tests", id_name="id", dataset_name="tests")
     result = dataset.compute_variables([expr])
     self.assertEqual(ma.allclose(result, [1, 5, 10], rtol=1e-7), True, msg="error in test_attribute")
     # check that the access methods for the variable all return the correct values
     name = VariableName(expr)
     self.assertEqual(name.get_package_name(), None, msg="bad value for package")
     self.assertEqual(name.get_dataset_name(), "tests", msg="bad value for dataset")
     self.assertEqual(name.get_short_name(), "persons", msg="bad value for shortname")
     self.assertEqual(name.get_alias(), "persons", msg="bad value for alias")
     self.assertEqual(name.get_autogen_class(), None, msg="bad value for autogen_class")
 def compute_values_from_multiple_runs(self, year, quantity_of_interest, dtype='float32', dataset_arguments={}):
     """
     'quantity_of_interest' is a variable name in its fully-qualified name.
     Return a matrix of size (dataset.size x number_of_runs), with values of the variable
     for each dataset member and run. Dataset is the one to which the 
     quantity_of_interest belongs to. 
     """
     variable_name = VariableName(quantity_of_interest)
     dataset_name = variable_name.get_dataset_name()
     for i in range(self.cache_set.size):
         ds = self._compute_variable_for_one_run(i, variable_name, dataset_name, year, dataset_arguments=dataset_arguments)
         if i == 0: # first run
             result = zeros((ds.size(), self.cache_set.size), dtype=dtype)
         result[:, i] = ds.get_attribute(variable_name)
     return result
    def _compute_if_needed(self,
                           name,
                           dataset_pool,
                           resources=None,
                           quiet=False,
                           version=None):
        """ Compute variable given by the argument 'name' only if this variable
        has not been computed before.
        Check first if this variable belongs to dataset1 or dataset2.
        dataset_pool holds available datasets.
        """
        if not isinstance(name, VariableName):
            variable_name = VariableName(name)
        else:
            variable_name = name
        short_name = variable_name.get_alias()

        dataset_name = variable_name.get_dataset_name()
        if dataset_name == self.get_dataset_name():
            new_version = UrbansimDataset._compute_if_needed(self,
                                                             variable_name,
                                                             dataset_pool,
                                                             resources,
                                                             quiet=quiet,
                                                             version=version)
        else:
            if dataset_name == self.dataset1.get_dataset_name():
                owner_dataset = self.dataset1
#                index = self.get_2d_index_of_dataset1()
            elif dataset_name == self.dataset2.get_dataset_name():
                owner_dataset = self.dataset2


#                index = self.get_2d_index()
            else:
                self._raise_error(
                    StandardError,
                    "Cannot find variable '%s'\nin either dataset or in the interaction set."
                    % variable_name.get_expression())
            owner_dataset.compute_variables([variable_name],
                                            dataset_pool,
                                            resources=resources,
                                            quiet=True)
            new_version =  self.compute_variables_return_versions_and_final_value("%s = %s.disaggregate(%s.%s)" % \
                                   ( short_name, self.get_dataset_name(), owner_dataset.get_dataset_name(), short_name ),
                                   dataset_pool=dataset_pool, resources=resources, quiet=quiet )[0]
        return new_version
 def _solve_dependencies(self, dataset_pool):
     dataset = self.get_dataset()
     my_dataset_name = dataset.get_dataset_name()
     dependencies_list = self.get_current_dependencies()
     for i in range(len(dependencies_list)): # compute dependent variables
         dep_item = dependencies_list[i][0]
         if isinstance(dep_item, str):
             depvar_name = VariableName(dep_item)
         else:
             depvar_name = dep_item.get_variable_name() # dep_item should be an instance of AttributeBox
         dataset_name = depvar_name.get_dataset_name()
         version = dependencies_list[i][1]
         if dataset_name == my_dataset_name:
             ds = dataset
         else:
             ds = dataset_pool.get_dataset(dataset_name)
         (new_versions, value) = ds.compute_variables_return_versions_and_final_value([(depvar_name, version)], dataset_pool)
         self.dependencies_list[i] = (ds._get_attribute_box(depvar_name), new_versions[0])
Exemple #35
0
 def _update_variable_from_fields(self):
     ''' update the variable with values from the gui widgets '''
     self.variable['name'] = str(self.leVarName.text())
     self.variable['source'] = str(self.cboVarType.currentText())
     self.variable['definition'] = str(
         self.le_var_def.document().toPlainText())
     try:
         v = VariableName(self.variable['definition'])
         dataset_name = v.get_dataset_name()
         interaction_set_names = v.get_interaction_set_names()
     except (SyntaxError, ValueError):
         MessageBox.error(
             mainwindow=self,
             text='parse error for variable',
             detailed_text=
             'setting dataset name for this variable to <unknown>')
         dataset_name = '<unknown>'
         interaction_set_names = None
     if dataset_name is None and interaction_set_names is not None:
         # It's an interaction set.  Look up possible names in available_datasets
         names = get_available_dataset_names(self.validator.project)
         n1 = interaction_set_names[0] + '_x_' + interaction_set_names[1]
         if n1 in names:
             dataset_name = n1
         else:
             n2 = interaction_set_names[1] + '_x_' + interaction_set_names[0]
             if n2 in names:
                 dataset_name = n2
             else:
                 MessageBox.error(
                     mainwindow=self,
                     text=
                     'unable to find an interaction set in available_datasets for this variable',
                     detailed_text=
                     "tried %s and %s \nbut couldn't find either name in available_datasets \nsetting dataset_name to <unknown>"
                     % (n1, n2))
                 dataset_name = '<unknown>'
     self.variable['dataset'] = dataset_name
     if self.rbUseModel.isChecked():
         self.variable['use'] = 'model variable'
     elif self.rbUseIndicator.isChecked():
         self.variable['use'] = 'indicator'
     else:
         self.variable['use'] = 'both'
Exemple #36
0
 def compute_expression(self, attribute_name, allow_missing=False):
     """Compute any expression and return its values. 
     If allow_missing is True, the code does not break if an attribute cannot be computed
     or a dataset is missing.
     """
     var_name = VariableName(attribute_name)
     dataset_name = var_name.get_dataset_name()
     try:
         ds = self.dataset_pool.get_dataset(dataset_name)
     except FileNotFoundError:
         if allow_missing:
             return None
         raise
     try:
         return ds.compute_variables([var_name], dataset_pool=self.dataset_pool)
     except (LookupError, FileNotFoundError, StandardError):
         if allow_missing:
             return np.zeros(ds.size(), dtype="bool8")
         raise
 def add_prefix_to_variable_names(self, variable_names, dataset, variable_package, resources):
     """Add a prefix of 'package.dataset_name.' to variable_names from resources.
     """
     if resources is None:
         return
     if not isinstance(variable_names, list):
         variable_names = [variable_names]
     for variable_name in variable_names:
         variable_string = resources.get(variable_name, None)
         if variable_string is not None:
             variable_string_name = VariableName(variable_string)
             if (variable_string_name.get_dataset_name() == None) and \
                         (variable_string_name.get_autogen_class() is None) :
                 add_string = ""
                 if variable_string_name.get_package_name() == None:
                     add_string = "%s." % variable_package
                 add_string = add_string + dataset.get_dataset_name() + "."
                 resources.merge({
                     variable_name:add_string+variable_string})
 def add_prefix_to_variable_names(self, variable_names, dataset,
                                  variable_package, resources):
     """Add a prefix of 'package.dataset_name.' to variable_names from resources.
     """
     if resources is None:
         return
     if not isinstance(variable_names, list):
         variable_names = [variable_names]
     for variable_name in variable_names:
         variable_string = resources.get(variable_name, None)
         if variable_string is not None:
             variable_string_name = VariableName(variable_string)
             if (variable_string_name.get_dataset_name() == None) and \
                         (variable_string_name.get_autogen_class() is None) :
                 add_string = ""
                 if variable_string_name.get_package_name() == None:
                     add_string = "%s." % variable_package
                 add_string = add_string + dataset.get_dataset_name() + "."
                 resources.merge(
                     {variable_name: add_string + variable_string})
Exemple #39
0
    def get_var(self, name):
        #creates a fake dataset, required for variable resolution
        def create_fake_dataset(dataset_name):
            storage = StorageFactory().get_storage('dict_storage')

            storage.write_table(
                table_name='fake_dataset',
                table_data={
                    'id':array([], dtype='int32')
                    }
                )

            dataset = Dataset(in_storage=storage, in_table_name='fake_dataset', dataset_name=dataset_name, id_name="id")
            return dataset
        var = VariableName(name)
        dataset = var.get_dataset_name()
        try:
            return self.factory.get_variable(var, create_fake_dataset(dataset), quiet=True)
        except LookupError:
            #print "LOOKUP ERROR: " + name
            return None
 def test_alias_attribute(self):
     # this tests an expression consisting of an alias for a primary attribute
     expr = "p = persons"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
         table_name='tests',
         table_data={
             "persons":array([1,5,10]),
             "id":array([1,3,4])
             }
         )
     dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests")
     result = dataset.compute_variables([expr])
     self.assertEqual(ma.allclose(result, [1,5,10], rtol=1e-7), True, msg="error in test_alias_attribute")
     # check that the access methods for the variable all return the correct values
     name = VariableName(expr)
     self.assertEqual(name.get_package_name(), None, msg="bad value for package")
     self.assertEqual(name.get_dataset_name(), None, msg="bad value for dataset")
     self.assert_(name.get_short_name().startswith('autogen'), msg="bad value for shortname")
     self.assertEqual(name.get_alias(), 'p', msg="bad value for alias")
     self.assertNotEqual(name.get_autogen_class(), None, msg="bad value for autogen_class")
 def get_all_dependencies(self):
     """Return all variables and attributes needed to compute this variable.  
     This is returned as a list of tuples where the first element is either AttributeBox or 
     VariableName of the dependent variable and the second element is the version for 
     which this variable was computed.
     """ 
     def create_fake_dataset(dataset_name):
         storage = StorageFactory().get_storage('dict_storage')
         
         storage.write_table(
             table_name='fake_dataset',
             table_data={
                 'id':array([], dtype='int32')
                 }
             )
         
         dataset = Dataset(in_storage=storage, in_table_name='fake_dataset', dataset_name=dataset_name, id_name="id")
         return dataset
     
     result_others = []
     dependencies_list = self.get_current_dependencies()
     for i in range(len(dependencies_list)):
         dep_item = dependencies_list[i][0]
         version = dependencies_list[i][1]
         isprimary = 0
         if isinstance(dep_item, str):
             depvar_name = VariableName(dep_item)
             dataset_name = depvar_name.get_dataset_name()
             var = VariableFactory().get_variable(depvar_name, create_fake_dataset(dataset_name), 
                                                            quiet=True)
             result_others = result_others + [(depvar_name, version)]                                              
         else: # dep_item should be an instance of AttributeBox
             var = dep_item.get_variable_instance()           
             result_others = result_others + [(dep_item, version)]
             isprimary = dep_item.is_primary()
             
         if (var <> None) and (not isprimary):
             res = var.get_all_dependencies()
             result_others = result_others + res
     return result_others
Exemple #42
0
 def _solve_dependencies(self, dataset_pool):
     dataset = self.get_dataset()
     my_dataset_name = dataset.get_dataset_name()
     dependencies_list = self.get_current_dependencies()
     for i in range(len(dependencies_list)):  # compute dependent variables
         dep_item = dependencies_list[i][0]
         if isinstance(dep_item, str):
             depvar_name = VariableName(dep_item)
         else:
             depvar_name = dep_item.get_variable_name(
             )  # dep_item should be an instance of AttributeBox
         dataset_name = depvar_name.get_dataset_name()
         version = dependencies_list[i][1]
         if dataset_name == my_dataset_name:
             ds = dataset
         else:
             ds = dataset_pool.get_dataset(dataset_name)
         (new_versions,
          value) = ds.compute_variables_return_versions_and_final_value(
              [(depvar_name, version)], dataset_pool)
         self.dependencies_list[i] = (ds._get_attribute_box(depvar_name),
                                      new_versions[0])
Exemple #43
0
    def get_all_dependencies(self):
        """Return all variables and attributes needed to compute this variable.  
        This is returned as a list of tuples where the first element is either AttributeBox or 
        VariableName of the dependent variable and the second element is the version for 
        which this variable was computed.
        """
        def create_fake_dataset(dataset_name):
            storage = StorageFactory().get_storage('dict_storage')

            storage.write_table(table_name='fake_dataset',
                                table_data={'id': array([], dtype='int32')})

            dataset = Dataset(in_storage=storage,
                              in_table_name='fake_dataset',
                              dataset_name=dataset_name,
                              id_name="id")
            return dataset

        result_others = []
        dependencies_list = self.get_current_dependencies()
        for i in range(len(dependencies_list)):
            dep_item = dependencies_list[i][0]
            version = dependencies_list[i][1]
            isprimary = 0
            if isinstance(dep_item, str):
                depvar_name = VariableName(dep_item)
                dataset_name = depvar_name.get_dataset_name()
                var = VariableFactory().get_variable(
                    depvar_name, create_fake_dataset(dataset_name), quiet=True)
                result_others = result_others + [(depvar_name, version)]
            else:  # dep_item should be an instance of AttributeBox
                var = dep_item.get_variable_instance()
                result_others = result_others + [(dep_item, version)]
                isprimary = dep_item.is_primary()

            if (var <> None) and (not isprimary):
                res = var.get_all_dependencies()
                result_others = result_others + res
        return result_others
Exemple #44
0
 def test_fully_qualified_DDD_SSS_variable(self):
     # this should use the test variable a_test_SSS_variable_DDD_SSS
     expr = "opus_core.tests.a_test_squid_variable_42_clam"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='tests',
                         table_data={
                             "a_dependent_variable": array([1, 5, 10]),
                             "id": array([1, 3, 4])
                         })
     dataset = Dataset(in_storage=storage,
                       in_table_name='tests',
                       id_name="id",
                       dataset_name="tests")
     result = dataset.compute_variables([expr])
     should_be = array([10, 50, 100])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6),
                  "Error in test_fully_qualified_DDD_SSS_variable")
     # check that the access methods for the variable all return the correct values
     name = VariableName(expr)
     self.assertEqual(name.get_package_name(),
                      'opus_core',
                      msg="bad value for package")
     self.assertEqual(name.get_dataset_name(),
                      'tests',
                      msg="bad value for dataset")
     self.assertEqual(name.get_short_name(),
                      'a_test_squid_variable_42_clam',
                      msg="bad value for shortname")
     self.assertEqual(name.get_alias(),
                      'a_test_squid_variable_42_clam',
                      msg="bad value for alias")
     self.assertEqual(name.get_autogen_class(),
                      None,
                      msg="bad value for autogen_class")
     # test that the variable can now also be accessed using its short name in an expression
     result2 = dataset.compute_variables(['a_test_squid_variable_42_clam'])
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6),
                  "Error in accessing a_test_squid_variable_42_clam")
 def _update_variable_from_fields(self):
     ''' update the variable with values from the gui widgets '''
     self.variable['name'] = str(self.leVarName.text())
     self.variable['source'] = str(self.cboVarType.currentText())
     self.variable['definition'] = str(self.le_var_def.document().toPlainText())
     try:
         v = VariableName(self.variable['definition'])
         dataset_name = v.get_dataset_name()
         interaction_set_names = v.get_interaction_set_names()
     except (SyntaxError, ValueError):
         MessageBox.error(mainwindow = self,
             text = 'parse error for variable',
             detailed_text = 'setting dataset name for this variable to <unknown>')
         dataset_name = '<unknown>'
         interaction_set_names = None
     if dataset_name is None and interaction_set_names is not None:
         # It's an interaction set.  Look up possible names in available_datasets
         names = get_available_dataset_names(self.validator.project)
         n1 = interaction_set_names[0] + '_x_' + interaction_set_names[1]
         if n1 in names:
             dataset_name = n1
         else:
             n2 = interaction_set_names[1] + '_x_' + interaction_set_names[0]
             if n2 in names:
                 dataset_name = n2
             else:
                 MessageBox.error(mainwindow = self,
                     text = 'unable to find an interaction set in available_datasets for this variable',
                     detailed_text = "tried %s and %s \nbut couldn't find either name in available_datasets \nsetting dataset_name to <unknown>" % (n1,n2) )
                 dataset_name = '<unknown>'
     self.variable['dataset'] = dataset_name
     if self.rbUseModel.isChecked():
         self.variable['use'] = 'model variable'
     elif self.rbUseIndicator.isChecked():
         self.variable['use'] = 'indicator'
     else:
         self.variable['use'] = 'both'
Exemple #46
0
 def compute_values_from_multiple_runs(self,
                                       year,
                                       quantity_of_interest,
                                       dtype='float32',
                                       dataset_arguments={}):
     """
     'quantity_of_interest' is a variable name in its fully-qualified name.
     Return a matrix of size (dataset.size x number_of_runs), with values of the variable
     for each dataset member and run. Dataset is the one to which the 
     quantity_of_interest belongs to. 
     """
     variable_name = VariableName(quantity_of_interest)
     dataset_name = variable_name.get_dataset_name()
     for i in range(self.cache_set.size):
         ds = self._compute_variable_for_one_run(
             i,
             variable_name,
             dataset_name,
             year,
             dataset_arguments=dataset_arguments)
         if i == 0:  # first run
             result = zeros((ds.size(), self.cache_set.size), dtype=dtype)
         result[:, i] = ds.get_attribute(variable_name)
     return result
 def get_attribute(self, name):
     """ Return an array of the (by the argument name) given attribute. """
     if not isinstance(name, VariableName):
         attr_name = VariableName(name)
     else:
         attr_name = name
     alias = attr_name.get_alias()
     dataset_name = attr_name.get_dataset_name()
     if not (alias in self.get_attribute_names()):
         if dataset_name == self.get_dataset(1).dataset_name:
             index = self.get_2d_index_of_dataset1()
             return self.get_dataset(1).get_attribute_by_index(attr_name, index)
         if dataset_name == self.get_dataset(2).dataset_name:
             index = self.get_2d_index()
             return self.get_dataset(2).get_attribute_by_index(attr_name, index)
         
         if alias in self.get_dataset(1).get_known_attribute_names():
             index = self.get_2d_index_of_dataset1()
             return self.get_dataset(1).get_attribute_by_index(attr_name, index)
         if alias in self.get_dataset(2).get_known_attribute_names():
             index = self.get_2d_index()
             return self.get_dataset(2).get_attribute_by_index(attr_name, index)
         self._raise_error(NameError, "Variable %s not found!" % alias)
     return self.attribute_boxes[alias].get_data()
 def test_alias_fully_qualified_variable(self):
     expr = "x = opus_core.tests.a_test_variable"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
         table_name='tests',
         table_data={
             "a_dependent_variable":array([1,5,10]),
             "id":array([1,3,4])
             }
         )
     dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests")
     result = dataset.compute_variables([expr])
     should_be = array([10,50,100])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_alias_fully_qualified_variable")
     # check that the new var has x as an alias
     v = VariableName(expr)
     self.assertEqual(v.get_package_name(), None, msg="bad value for package_name")
     self.assertEqual(v.get_dataset_name(), 'tests', msg="bad value for dataset_name")
     self.assert_(v.get_short_name().startswith('autogen'), msg="bad value for shortname")
     self.assertEqual(v.get_alias(), 'x', msg="bad value for alias")
     self.assertNotEqual(v.get_autogen_class(), None, msg="bad value for autogen_class")
     # check that the alias has the correct value
     result2 = dataset.compute_variables(['x'])
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_variable")
    def _analyze_aggregation_method_call(self, receiver, method, args):
        same, vars = match(SUBPATTERN_AGGREGATION, args)
        if not same:
            raise ValueError, "syntax error for aggregation method call"
        arg_dict = self._get_arguments(
            ('arg1', 'arg2', 'arg3'),
            ('aggr_var', 'intermediates', 'function'), vars)
        if 'aggr_var' not in arg_dict:
            raise ValueError, "syntax error for aggregation method call (problem with argument for variable being aggregated)"
        same1, vars1 = match(SUBPATTERN_FULLY_QUALIFIED_VARIABLE_ARG,
                             arg_dict['aggr_var'])
        if same1:
            # the aggregated variable is a fully-qualified name
            pkg = vars1['package']
            dataset = vars1['dataset']
            attr = vars1['shortname']
        else:
            same2, vars2 = match(SUBPATTERN_DATASET_QUALIFIED_VARIABLE_ARG,
                                 arg_dict['aggr_var'])
            if same2:
                # the aggregated variable is a dataset-qualified name
                pkg = None
                dataset = vars2['dataset']
                attr = vars2['shortname']
            else:
                # The thing being aggregated is an expression.  Generate a new autogen variable for that expression,
                # and use the autogen variable in the aggregation call.
                subexpr = arg_dict['aggr_var']
                newvar = VariableName(parsetree_to_string(subexpr))
                pkg = None
                dataset = newvar.get_dataset_name()
                if dataset is None:
                    raise ValueError, "syntax error for aggregation method call - could not determine dataset for variable being aggregated"
                attr = newvar.get_short_name()
                # TODO DELETE BELOW:
#                replacements = {'dataset': dataset, 'attribute': attr}
#                newvar_tree = parsetree_substitute(DATASET_QUALIFIED_VARIABLE_TEMPLATE, replacements)
#                self._parsetree_replacements[subexpr] = newvar_tree
        if 'intermediates' in arg_dict:
            # make sure that it really is a list
            s, v = match(SUBPATTERN_LIST_ARG, arg_dict['intermediates'])
            if not s:
                raise ValueError, "syntax error for aggregation method call (list of intermediate datasets not a list?)"
            intermediates = tuple(
                self._extract_names(arg_dict['intermediates']))
        else:
            intermediates = ()
        if 'function' in arg_dict:
            # bind fcn to a string that is the name of the function, or to the string "None"
            s, v = match(SUBPATTERN_NAME_ARG, arg_dict['function'])
            if not s:
                raise ValueError, "syntax error for aggregation method call (problem with the function argument in the call)"
            fcn = v['name']
        else:
            fcn = None
        self._aggregation_calls.add(
            (receiver, method, pkg, dataset, attr, intermediates, fcn))
        quoted_intermediates = "" if len(intermediates) == 0 else quote(
            intermediates[0])
        for n in intermediates[1:]:
            quoted_intermediates = quoted_intermediates + ', ' + quote(n)
        # 'call' is a string representing the new aggregation call.  Parse it, extract the args, and then add a replacement to
        # parsetree_replacements for the old args.  We want to replace just the args and not the entire call to aggregate,
        # since the way Python represents parsetrees the whole tree may include astype and exponentiation calls, and it's simpler
        # to just replace the args part.
        call = "%s.%s(%s, %s,%s, [%s], %s)" % (
            receiver, method, quote(pkg), quote(dataset), quote(attr),
            quoted_intermediates, quote(fcn))
        (newtree, _) = self._parse_expr(call)
        s, v = match(FULL_EXPRESSION_METHOD_CALL, newtree)
        if not s:
            raise StandardError, 'internal error - problem generating new aggregation expression'
        self._parsetree_replacements[args] = v['args']
    def run(self,
            dataset1,
            dataset2,
            index1=None,
            index2=None,
            sample_size=10,
            weight=None,
            include_chosen_choice=False,
            with_replacement=False,
            resources=None,
            dataset_pool=None):
        """this function samples number of sample_size (scalar value) alternatives from dataset2
        for agent set specified by dataset1.
        If index1 is not None, only samples alterantives for agents with indices in index1;
        if index2 is not None, only samples alternatives from indices in index2.
        sample_size specifies number of alternatives to be sampled for each agent.
        weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d
        array of the same length as index2 or 2d array of shape (index1.size, index2.size).

        Also refer to document of interaction_dataset"""

        if dataset_pool is None:
            try:
                sc = SessionConfiguration()
                dataset_pool = sc.get_dataset_pool()
            except:
                dataset_pool = DatasetPool()

        local_resources = Resources(resources)
        local_resources.merge_if_not_None({
            "dataset1":
            dataset1,
            "dataset2":
            dataset2,
            "index1":
            index1,
            "index2":
            index2,
            "sample_size":
            sample_size,
            "weight":
            weight,
            "with_replacement":
            with_replacement,
            "include_chosen_choice":
            include_chosen_choice
        })

        local_resources.check_obligatory_keys(
            ['dataset1', 'dataset2', 'sample_size'])
        agent = local_resources["dataset1"]
        index1 = local_resources.get("index1", None)
        if index1 is None:
            index1 = arange(agent.size())
        choice = local_resources["dataset2"]
        index2 = local_resources.get("index2", None)
        if index2 is None:
            index2 = arange(choice.size())

        if index1.size == 0 or index2.size == 0:
            err_msg = "either choice size or agent size is zero, return None"
            logger.log_warning(err_msg)
            return None

        include_chosen_choice = local_resources.get("include_chosen_choice",
                                                    False)
        J = local_resources["sample_size"]
        if include_chosen_choice:
            J = J - 1

        with_replacement = local_resources.get("with_replacement")

        weight = local_resources.get("weight", None)
        if isinstance(weight, str):
            if weight in choice.get_known_attribute_names():
                weight = choice.get_attribute(weight)
                rank_of_weight = 1
            else:
                varname = VariableName(weight)
                if varname.get_dataset_name() == choice.get_dataset_name():
                    weight = choice.compute_variables(
                        weight, dataset_pool=dataset_pool)
                    rank_of_weight = 1
                elif varname.get_interaction_set_names() is not None:
                    ## weights can be an interaction variable
                    interaction_dataset = InteractionDataset(local_resources)
                    weight = interaction_dataset.compute_variables(
                        weight, dataset_pool=dataset_pool)
                    rank_of_weight = 2
                    assert (len(weight.shape) >= rank_of_weight)
                else:
                    err_msg = ("weight is neither a known attribute name "
                               "nor a simple variable from the choice dataset "
                               "nor an interaction variable: '%s'" % weight)
                    logger.log_error(err_msg)
                    raise ValueError, err_msg
        elif isinstance(weight, ndarray):
            rank_of_weight = weight.ndim
        elif not weight:  ## weight is None or empty string
            weight = ones(index2.size)
            rank_of_weight = 1
        else:
            err_msg = "unkown weight type"
            logger.log_error(err_msg)
            raise TypeError, err_msg

        if (weight.size <> index2.size) and (weight.shape[rank_of_weight - 1]
                                             <> index2.size):
            if weight.shape[rank_of_weight - 1] == choice.size():
                if rank_of_weight == 1:
                    weight = take(weight, index2)
                if rank_of_weight == 2:
                    weight = take(weight, index2, axis=1)
            else:
                err_msg = "weight array size doesn't match to size of dataset2 or its index"
                logger.log_error(err_msg)
                raise ValueError, err_msg

        prob = normalize(weight)

        #chosen_choice = ones(index1.size) * UNPLACED_ID
        chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1]
        #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0]
        chosen_choice_index = choice.try_get_id_index(
            chosen_choice_id, return_value_if_not_found=UNPLACED_ID)
        chosen_choice_index_to_index2 = lookup(chosen_choice_index,
                                               index2,
                                               index_if_not_found=UNPLACED_ID)

        if rank_of_weight == 1:  # if weight_array is 1d, then each agent shares the same weight for choices
            replace = with_replacement  # sampling with no replacement
            non_zero_counts = nonzerocounts(weight)
            if non_zero_counts < J:
                logger.log_warning(
                    "weight array dosen't have enough non-zero counts, use sample with replacement"
                )
                replace = True
            if non_zero_counts > 0:
                sampled_index = prob2dsample(
                    index2,
                    sample_size=(index1.size, J),
                    prob_array=prob,
                    exclude_index=chosen_choice_index_to_index2,
                    replace=replace,
                    return_index=True)
            else:
                # all alternatives have a zero weight
                sampled_index = zeros((index1.size, 0), dtype=DTYPE)
            #return index2[sampled_index]

        if rank_of_weight == 2:
            sampled_index = zeros((index1.size, J), dtype=DTYPE) - 1

            for i in range(index1.size):
                replace = with_replacement  # sampling with/without replacement
                i_prob = prob[i, :]
                if nonzerocounts(i_prob) < J:
                    logger.log_warning(
                        "weight array dosen't have enough non-zero counts, use sample with replacement"
                    )
                    replace = True

                #exclude_index passed to probsample_noreplace needs to be indexed to index2
                sampled_index[i, :] = probsample_noreplace(
                    index2,
                    sample_size=J,
                    prob_array=i_prob,
                    exclude_index=chosen_choice_index_to_index2[i],
                    return_index=True)
        sampling_prob = take(prob, sampled_index)
        sampled_index_within_prob = sampled_index.copy()
        sampled_index = index2[sampled_index]
        is_chosen_choice = zeros(sampled_index.shape, dtype="bool")
        #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32")
        if include_chosen_choice:
            sampled_index = column_stack(
                (chosen_choice_index[:, newaxis], sampled_index))
            is_chosen_choice = zeros(sampled_index.shape, dtype="bool")
            is_chosen_choice[chosen_choice_index != UNPLACED_ID, 0] = 1
            #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1]
            ## this is necessary because prob is indexed to index2, not to the choice set (as is chosen_choice_index)
            sampling_prob_for_chosen_choices = take(
                prob, chosen_choice_index_to_index2[:, newaxis])
            ## if chosen choice chosen equals unplaced_id then the sampling prob is 0
            sampling_prob_for_chosen_choices[where(
                chosen_choice_index == UNPLACED_ID)[0], ] = 0.0
            sampling_prob = column_stack(
                [sampling_prob_for_chosen_choices, sampling_prob])

        interaction_dataset = self.create_interaction_dataset(
            dataset1, dataset2, index1, sampled_index)
        interaction_dataset.add_attribute(sampling_prob,
                                          '__sampling_probability')
        interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice')

        if local_resources.get("include_mnl_bias_correction_term", False):
            if include_chosen_choice:
                sampled_index_within_prob = column_stack(
                    (chosen_choice_index_to_index2[:, newaxis],
                     sampled_index_within_prob))
            interaction_dataset.add_mnl_bias_correction_term(
                prob, sampled_index_within_prob)

        ## to get the older returns
        #sampled_index = interaction_dataset.get_2d_index()
        #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32")
        #where_chosen = where(interaction_dataset.get_attribute("chosen_choice"))
        #chosen_choices[where_chosen[0]]=where_chosen[1]
        #return (sampled_index, chosen_choice)

        return interaction_dataset
Exemple #51
0
class ObservedDataOneQuantity:
    """  Class for storing information about one quantity measure. It is to be grouped in 
    an object of class ObservedData.
    """
    # pairs of inverse transformations
    transformation_pairs = {"sqrt": "**2", "log":"exp", "exp": "log", "**2": "sqrt"}

    def __init__(self, variable_name, observed_data, filename=None,  transformation=None, inverse_transformation=None, 
                 filter=None, match=False, dependent_datasets={}, **kwargs):
        """  'variable_name' is a quantity about which we have data available.
        'observed_data' is of type ObservedData, it is the grouping parent. 
        'filename' is the name of file where 
        the data is stored. It can be None, if the observed_data.directory is a cache.
        'transformation' is an operation to be performed on the data (e.g. sqrt, log),
        'inverse_transformation' is the inverse function of 'transformation'. If it not given, it
        is determined automatically.
        'filter' is a variable that will be applied to both, the observed data and the simulated data.
        'match' (logical) determines if the dataset should be matched (by ids) with the simulated dataset. Elements
        that don't match are eliminated from the simulated dataset.
        'dependent_datasets' (if any) should be a dictionary of dataset_name:{'filename': filename, 'match': True|False, **kwargs}. 
        They will be added to the dataset_pool. 
        Remaining arguments are passed into DatasetFactory, thus it can contain information about how 
        to create the corresponding dataset.
        """
        self.variable_name = VariableName(variable_name)
        self.dataset_name = self.variable_name.get_dataset_name()
        dataset_pool = observed_data.get_dataset_pool()
        self.matching_datasets = {}
        
        if dataset_pool is None:
            kwargs.update({'in_storage':observed_data.get_storage(), 'in_table_name': filename})
            try:
                self.dataset = DatasetFactory().search_for_dataset(self.dataset_name, observed_data.get_package_order(), arguments=kwargs)
            except: # take generic dataset
                self.dataset = Dataset(dataset_name=self.dataset_name, **kwargs)
        else:
            self.dataset = dataset_pool.get_dataset(self.dataset_name)
        if match:
            self.add_match(self.dataset)
        for dep_dataset_name, info in dependent_datasets.iteritems():
            if dataset_pool is None:
                dataset_pool = DatasetPool(storage=observed_data.get_storage(), package_order=observed_data.get_package_order())
            info.update({'in_storage':observed_data.get_storage(), 'in_table_name': info.get('filename')})
            del info['filename']
            match = False
            if 'match' in info.keys():
                match = info['match']
                del info['match']
            try:
                dep_dataset = DatasetFactory().search_for_dataset(dep_dataset_name, observed_data.get_package_order(), arguments=info)
            except:
                dep_dataset = Dataset(dataset_name=dep_dataset_name, **info)
            dataset_pool.replace_dataset(dep_dataset_name, dep_dataset)
            if match:
                self.add_match(dep_dataset)
        if self.variable_name.get_alias() not in self.dataset.get_known_attribute_names():
            self.dataset.compute_variables([self.variable_name], dataset_pool=dataset_pool)
        if filter is not None:
            filter_values = self.dataset.compute_variables([filter], dataset_pool=dataset_pool)
            idx = where(filter_values > 0)[0]
            self.add_match(self.dataset, idx)
            self.dataset.subset_by_index(idx)
        self.transformation = transformation
        self.inverse_transformation = inverse_transformation
        if (self.transformation is not None) and (self.inverse_transformation is None):
            self.inverse_transformation = self.transformation_pairs[self.transformation]
                
    def get_values(self):
        return self.dataset.get_attribute(self.variable_name)
        
    def get_transformed_values(self):
        return try_transformation(self.get_values(), self.transformation)
        
    def get_variable_name(self):
        return self.variable_name
    
    def get_dataset(self):
        return self.dataset
    
    def get_dataset_name(self):
        return self.dataset_name
    
    def get_transformation(self):
        return self.transformation
    
    def get_transformation_pair(self):
        return (self.transformation, self.inverse_transformation)
    
    def add_match(self, dataset, index = None):
        dataset_name = dataset.get_dataset_name()
        result = zeros(dataset.size(), dtype='bool8')
        idx = index
        if index is None:
            idx = arange(dataset.size())
        result[idx] = 1
        if dataset_name in self.matching_datasets.keys():
            tmp = zeros(dataset.size(), dtype='bool8')
            tmp[dataset.get_id_index(self.matching_datasets[dataset_name])]=1
            result = result*tmp
        self.matching_datasets[dataset_name] = dataset.get_id_attribute()[where(result)]
        
    def get_matching_datasets(self):
        return self.matching_datasets
Exemple #52
0
            estimation_results = model_system.run_year_namespace[results_name]
            
            model_data.append({"data":data, 
                               'index_chosen':index_chosen,
                               'sampling_probability': sampling_prob,
                               'variable_names':variable_names,
                               'estimation_results':estimation_results,
                               'model_name':model_name,
                               'choice_type':options.choice_type[h][i]
                               })

            if options.market_share[h][i]:
                ms_expression = options.market_share[h][i]
                ms_variablename = VariableName(ms_expression)
        
                dataset_name = ms_variablename.get_dataset_name()
                ds = model_system.run_year_namespace[dataset_name] or model_system.run_year_namespace['datasets'][dataset_name]
                id_name = ds.get_id_name()[0]
                ds.compute_variables([ms_variablename], dataset_pool=dataset_pool)
                ms = ds.get_multiple_attributes([id_name, ms_variablename.get_alias()])
                
                market_ids = m.choice_set.compute_one_variable_with_unknown_package( id_name, dataset_pool=dataset_pool)
                market_ids_2d = market_ids[m.model_interaction.get_choice_index()]
                model_data[i].update({'market_id':market_ids_2d, 'market_share':ms})

            logger.end_block()
        training_data.append(model_data)
        
    config = xmlconfig.get_run_configuration(options.scenario_name)
    if not options.agents_index:
        agent_set = dataset_pool.get_dataset(options.agent_set)