def match_agent_attribute_to_choice(self, name, dataset_pool=None): """ Return a tuple where the first element is a 2D array of the attribute 'name_{postfix}'. It is assumed to be an attribute of dataset1 (possibly computed). {postfix} is created either by values of the attribute 'name' of dataset2 (if it has any such attribute), or by the id values of dataset2. The second value of the resulting tuple is a list of dependent variables. """ if 'name' in self.get_dataset(2).get_known_attribute_names(): name_postfix = self.get_attribute_of_dataset('name', 2) else: name_postfix = self.get_id_attribute_of_dataset(2) name_postfix_alt = self.get_id_attribute_of_dataset(2) dependencies = [] for i in range(self.get_reduced_m()): full_name = VariableName("%s_%s" % (name, name_postfix[i])) if full_name.get_dataset_name() is None: full_name = VariableName("%s.%s" % (self.get_dataset(1).get_dataset_name(), full_name.get_expression())) try: self.get_dataset(1).compute_variables(full_name, dataset_pool=dataset_pool) except: full_name = VariableName("%s_%s" % (name, name_postfix_alt[i])) if full_name.get_dataset_name() is None: full_name = VariableName("%s.%s" % (self.get_dataset(1).get_dataset_name(), full_name.get_expression())) self.get_dataset(1).compute_variables(full_name, dataset_pool=dataset_pool) dependencies.append(full_name.get_expression()) if i == 0: result = self.get_attribute(full_name) else: result[:,i] = self.get_attribute_of_dataset(full_name, 1) return result, dependencies
def run(self, year, condition=None, max_iter=10): """ 'year' is the current year of the simulation. 'condition' should be a boolean expression defined on any dataset. The method iterates over the given models until all values of the expression are True. 'max_iter' gives the maximum number of iterations to run, if 'condition' is not fulfilled. If it is None, there is no limit and thus, the condition must be fulfilled in order to terminate. If 'condition' is None, the set of models is run only once. """ self.config['years'] = (year, year) if condition is None: return self.model_system.run_in_same_process(self.config) dataset_pool = SessionConfiguration().get_dataset_pool() variable_name = VariableName(condition) dataset = dataset_pool.get_dataset(variable_name.get_dataset_name()) condition_value = dataset.compute_variables(variable_name, dataset_pool=dataset_pool) result = None iter = 1 while not alltrue(condition_value): result = self.model_system.run_in_same_process(self.config) if max_iter is None or iter > max_iter: break iter = iter + 1 # force to recompute the condition dataset = SessionConfiguration().get_dataset_pool().get_dataset(variable_name.get_dataset_name()) dataset.delete_computed_attributes() condition_value = dataset.compute_variables(variable_name, dataset_pool=SessionConfiguration().get_dataset_pool()) if not alltrue(condition_value): logger.log_status('%s did not converge. Maximum number of iterations (%s) reached.' % (self.model_name, max_iter)) else: logger.log_status('%s converged in %s iterations.' % (self.model_name, iter-1)) return result
def check_parse_errors(self, variables): # check the variables in the expression library as indexed by the list 'variables'. errors = [] for (var_name, dataset_name, use, source, expr) in variables: # special case -- the 'constant' expression always passes if expr.strip()=='constant' and var_name=='constant': continue try: n = VariableName(expr) # check that the expression is of the correct form given the source if source=='primary attribute': if n.get_autogen_class() is not None: errors.append("Error - this is parsing as an expression rather than as a primary attribute: (%s, %s): %s" % (var_name, dataset_name, expr)) elif n.get_dataset_name() is None: errors.append("Error in primary attribute - missing dataset name: (%s, %s): %s" % (var_name, dataset_name, expr)) elif dataset_name!=n.get_dataset_name(): errors.append("Error in primary attribute - dataset name mismatch: (%s, %s): %s" % (var_name, dataset_name, expr)) elif n.get_package_name() is not None: errors.append("Error in primary attribute - shouldn't have package name: (%s, %s): %s" % (var_name, dataset_name, expr)) elif source=='expression': if n.get_autogen_class() is None: errors.append("Error - this doesn't seem to be an expression. Maybe it should be a Python class or primary attribute?: (%s, %s): %s" % (var_name, dataset_name, expr)) elif source=='Python class': if n.get_autogen_class() is not None: errors.append("Error - this is parsing as an expression rather than as a Python class reference: (%s, %s): %s" % (var_name, dataset_name, expr)) elif n.get_package_name() is None: errors.append("Error - missing package name in Python class reference: (%s, %s): %s" % (var_name, dataset_name, expr)) elif n.get_dataset_name() is None: errors.append("Error - missing dataset name in Python class reference: (%s, %s): %s" % (var_name, dataset_name, expr)) elif dataset_name!=n.get_dataset_name(): errors.append("Error - dataset name mismatch in Python class reference: (%s, %s): %s" % (var_name, dataset_name, expr)) else: errors.append("Unknown source type %s: (%s, %s): %s" % (source, var_name, dataset_name, expr)) except (SyntaxError, ValueError), e: errors.append("Parsing error: (%s, %s): %s" % (var_name, dataset_name, str(e)))
def create_and_check_qualified_variable_name(self, name): """Convert name to a VariableName if it isn't already, and add dataset_name to the VariableName if it is missing. If it already has a dataset_name, make sure it is the same as the name of this dataset. """ if isinstance(name, VariableName): vname = name else: vname = VariableName(name) if vname.get_dataset_name() is None: vname.set_dataset_name(self.get_dataset_name()) else: self._check_dataset_name(vname.get_dataset_name()) return vname
def create_and_check_qualified_variable_name(self, name): """Convert name to a VariableName if it isn't already, and add dataset_name to the VariableName if it is missing. If it already has a dataset_name, make sure it is the same as the name of this dataset. """ if isinstance(name, VariableName): vname = name else: vname = VariableName(name) if vname.get_dataset_name() is None: vname.set_dataset_name(self.get_dataset_name()) else: self._check_dataset_name(vname.get_dataset_name()) return vname
def test_fully_qualified_DDD_SSS_variable(self): # this should use the test variable a_test_SSS_variable_DDD_SSS expr = "opus_core.tests.a_test_squid_variable_42_clam" storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='tests', table_data={ "a_dependent_variable":array([1,5,10]), "id":array([1,3,4]) } ) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") result = dataset.compute_variables([expr]) should_be = array([10,50,100]) self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_fully_qualified_DDD_SSS_variable") # check that the access methods for the variable all return the correct values name = VariableName(expr) self.assertEqual(name.get_package_name(), 'opus_core', msg="bad value for package") self.assertEqual(name.get_dataset_name(), 'tests', msg="bad value for dataset") self.assertEqual(name.get_short_name(), 'a_test_squid_variable_42_clam', msg="bad value for shortname") self.assertEqual(name.get_alias(), 'a_test_squid_variable_42_clam', msg="bad value for alias") self.assertEqual(name.get_autogen_class(), None, msg="bad value for autogen_class") # test that the variable can now also be accessed using its short name in an expression result2 = dataset.compute_variables(['a_test_squid_variable_42_clam']) self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_squid_variable_42_clam")
def test_unary_functions_fully_qualified_name(self): # this tests expressions with unary functions applied to a fully qualified name expr = "sqrt(opus_core.tests.a_test_variable)" storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='tests', table_data={ "a_dependent_variable":array([1,5,10]), "id":array([1,3,4]) } ) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") result = dataset.compute_variables([expr]) should_be = array([3.16227766, 7.0710678, 10]) self.assertEqual(ma.allclose(result, should_be, rtol=1e-3), True, msg="error in test_unary_functions_fully_qualified_name") # check that the access methods for the variable all return the correct values name = VariableName(expr) autogen = name.get_autogen_class() self.assert_(issubclass(autogen, Variable), msg="autogen'd class isn't a Variable") self.assertEqual(name.get_package_name(), None, msg="bad value for package") self.assertEqual(name.get_dataset_name(), 'tests', msg="bad value for dataset") self.assertEqual(name.get_short_name(), autogen.__name__, msg="bad value for shortname") self.assertEqual(name.get_alias(), autogen.__name__, msg="bad value for alias") # make an instance of the class and check the dependencies (since the dependent variables # all have fully-qualifed names we don't need to associate a dataset with the variable # for this test) self.assertEqual(autogen().dependencies(), ['opus_core.tests.a_test_variable'], msg="dependencies are incorrect")
def test_fully_qualified_variable(self): # this tests an expression consisting of a fully-qualified variable expr = "opus_core.test_agent.income_times_2" storage = StorageFactory().get_storage("dict_storage") storage.write_table(table_name="test_agents", table_data={"income": array([1, 5, 10]), "id": array([1, 3, 4])}) dataset = Dataset(in_storage=storage, in_table_name="test_agents", id_name="id", dataset_name="test_agent") result = dataset.compute_variables([expr]) should_be = array([2, 10, 20]) self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_fully_qualified_variable") # check that expr is in the cache of known expressions # (normally we shouldn't be accessing this private field, but just this once ...) cache = VariableName._cache self.assert_(expr in cache, msg="did not find expr in cache") # check that the access methods for the variable all return the correct values name = VariableName(expr) self.assertEqual(name.get_package_name(), "opus_core", msg="bad value for package") self.assertEqual(name.get_dataset_name(), "test_agent", msg="bad value for dataset") self.assertEqual(name.get_short_name(), "income_times_2", msg="bad value for shortname") self.assertEqual(name.get_alias(), "income_times_2", msg="bad value for alias") self.assertEqual(name.get_autogen_class(), None, msg="bad value for autogen_class") # test that the variable can now also be accessed using its short name in an expression result2 = dataset.compute_variables(["income_times_2"]) self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_variable") # check that the cache uses the variable name with whitespace removed oldsize = len(cache) expr_with_spaces = "opus_core . test_agent. income_times_2 " name2 = VariableName(expr_with_spaces) newsize = len(cache) self.assertEqual(oldsize, newsize, msg="caching error") self.assert_(expr_with_spaces not in cache, msg="caching error") self.assertEqual(expr_with_spaces, name2.get_expression(), msg="caching error") self.assertEqual(name2.get_short_name(), "income_times_2", msg="bad value for shortname")
def test_unary_functions_fully_qualified_name(self): # this tests expressions with unary functions applied to a fully qualified name expr = "sqrt(opus_core.tests.a_test_variable)" storage = StorageFactory().get_storage("dict_storage") storage.write_table( table_name="tests", table_data={"a_dependent_variable": array([1, 5, 10]), "id": array([1, 3, 4])} ) dataset = Dataset(in_storage=storage, in_table_name="tests", id_name="id", dataset_name="tests") result = dataset.compute_variables([expr]) should_be = array([3.16227766, 7.0710678, 10]) self.assertEqual( ma.allclose(result, should_be, rtol=1e-3), True, msg="error in test_unary_functions_fully_qualified_name" ) # check that the access methods for the variable all return the correct values name = VariableName(expr) autogen = name.get_autogen_class() self.assert_(issubclass(autogen, Variable), msg="autogen'd class isn't a Variable") self.assertEqual(name.get_package_name(), None, msg="bad value for package") self.assertEqual(name.get_dataset_name(), "tests", msg="bad value for dataset") self.assertEqual(name.get_short_name(), autogen.__name__, msg="bad value for shortname") self.assertEqual(name.get_alias(), autogen.__name__, msg="bad value for alias") # make an instance of the class and check the dependencies (since the dependent variables # all have fully-qualifed names we don't need to associate a dataset with the variable # for this test) self.assertEqual( autogen().dependencies(), ["opus_core.tests.a_test_variable"], msg="dependencies are incorrect" )
def get_attribute(self, name): """ Return an array of the (by the argument name) given attribute. """ if not isinstance(name, VariableName): attr_name = VariableName(name) else: attr_name = name alias = attr_name.get_alias() dataset_name = attr_name.get_dataset_name() if not (alias in self.get_attribute_names()): if dataset_name == self.get_dataset(1).dataset_name: index = self.get_2d_index_of_dataset1() return self.get_dataset(1).get_attribute_by_index( attr_name, index) if dataset_name == self.get_dataset(2).dataset_name: index = self.get_2d_index() return self.get_dataset(2).get_attribute_by_index( attr_name, index) if alias in self.get_dataset(1).get_known_attribute_names(): index = self.get_2d_index_of_dataset1() return self.get_dataset(1).get_attribute_by_index( attr_name, index) if alias in self.get_dataset(2).get_known_attribute_names(): index = self.get_2d_index() return self.get_dataset(2).get_attribute_by_index( attr_name, index) self._raise_error(NameError, "Variable %s not found!" % alias) return self.attribute_boxes[alias].get_data()
def _do_flush_dependent_variables_if_required(self): try: if not SessionConfiguration().get('flush_variables', False): return except: return from opus_core.datasets.interaction_dataset import InteractionDataset dataset = self.get_dataset() dependencies = self.get_current_dependencies() my_dataset_name = dataset.get_dataset_name() for iattr in range( len(dependencies)): # iterate over dependent variables dep_item = dependencies[iattr][0] if isinstance(dep_item, str): depvar_name = VariableName(dep_item) else: depvar_name = dep_item.get_variable_name( ) # dep_item should be an instance of AttributeBox dataset_name = depvar_name.get_dataset_name() if dataset_name == my_dataset_name: ds = dataset else: ds = SessionConfiguration().get_dataset_from_pool(dataset_name) #ds = dataset_pool.get_dataset('dataset_name') if not isinstance(ds, InteractionDataset): short_name = depvar_name.get_alias() if short_name not in ds.get_id_name(): ds.flush_attribute(depvar_name)
def test_alias_attribute(self): # this tests an expression consisting of an alias for a primary attribute expr = "p = persons" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='tests', table_data={ "persons": array([1, 5, 10]), "id": array([1, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") result = dataset.compute_variables([expr]) self.assertEqual(ma.allclose(result, [1, 5, 10], rtol=1e-7), True, msg="error in test_alias_attribute") # check that the access methods for the variable all return the correct values name = VariableName(expr) self.assertEqual(name.get_package_name(), None, msg="bad value for package") self.assertEqual(name.get_dataset_name(), None, msg="bad value for dataset") self.assert_(name.get_short_name().startswith('autogen'), msg="bad value for shortname") self.assertEqual(name.get_alias(), 'p', msg="bad value for alias") self.assertNotEqual(name.get_autogen_class(), None, msg="bad value for autogen_class")
def test_constants(self): # test an expression involving two dataset names, one of which is *_constant expr = "test_agent.age<=opus_constant.young_age" storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='test_agents', table_data={ "age":array([30,20,60,80]), "id":array([1,3,4,10]) } ) storage.write_table( table_name='opus_constants', table_data={ "young_age":array([35]), "opus_constant_id":array([1]) } ) dataset_pool = DatasetPool(storage=storage) # Test that the dataset name is correct for expr. It should be test_agent -- opus_constant just holds constants, # and is ignored as far as finding the dataset name for the expression. name = VariableName(expr) autogen = name.get_autogen_class() self.assertEqual(name.get_package_name(), None) self.assertEqual(name.get_dataset_name(), 'test_agent') # make an instance of the class and check the dependencies (it shouldn't depend on urbansim_constant) self.assertEqual(autogen().dependencies(), ['test_agent.age']) dataset = Dataset(in_storage=storage, in_table_name='test_agents', id_name="id", dataset_name="test_agent") result = dataset.compute_variables([expr], dataset_pool=dataset_pool) should_be = array( [True,True,False,False] ) self.assertEqual( ma.allequal( result, should_be), True)
def test_interaction_set_component(self): # test a fully-qualified variable that applies to a component of an interaction set expr = "opus_core.test_agent.income_times_2" storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='test_agents', table_data={'id': array([1, 2, 3]), 'income': array([1, 20, 500])} ) storage.write_table( table_name='test_locations', table_data={'id': array([1,2]), 'cost': array([1000, 2000])} ) dataset_pool = DatasetPool(package_order=['opus_core'], storage=storage) test_agent_x_test_location = dataset_pool.get_dataset('test_agent_x_test_location') result = test_agent_x_test_location.compute_variables(expr, dataset_pool=dataset_pool) should_be = array([[2, 2], [40, 40], [1000, 1000]]) self.assert_(ma.allclose(result, should_be, rtol=1e-6), msg = "Error in " + expr) # test that the interaction set now has this as an attribute result2 = test_agent_x_test_location.get_attribute('income_times_2') self.assert_(ma.allclose(result2, should_be, rtol=1e-6), msg = "Error in " + expr) # test that the variable can now also be accessed using its short name result3 = test_agent_x_test_location.compute_variables(['income_times_2']) self.assert_(ma.allclose(result3, should_be, rtol=1e-6), msg = "Error in " + expr) # even though we're using this with an interaction set, the dataset name for expr # should be the name of the component set (since that's the only one mentioned in expr) name = VariableName(expr) self.assertEqual(name.get_dataset_name(), 'test_agent', msg="bad value for dataset")
def test_alias_fully_qualified_variable(self): expr = "x = opus_core.tests.a_test_variable" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='tests', table_data={ "a_dependent_variable": array([1, 5, 10]), "id": array([1, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") result = dataset.compute_variables([expr]) should_be = array([10, 50, 100]) self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_alias_fully_qualified_variable") # check that the new var has x as an alias v = VariableName(expr) self.assertEqual(v.get_package_name(), None, msg="bad value for package_name") self.assertEqual(v.get_dataset_name(), 'tests', msg="bad value for dataset_name") self.assert_(v.get_short_name().startswith('autogen'), msg="bad value for shortname") self.assertEqual(v.get_alias(), 'x', msg="bad value for alias") self.assertNotEqual(v.get_autogen_class(), None, msg="bad value for autogen_class") # check that the alias has the correct value result2 = dataset.compute_variables(['x']) self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_variable")
def test_multiply(self): expr = 'test_agent.income*test_location.cost' storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='test_agents', table_data={'id': array([1, 2, 3]), 'income': array([1, 20, 500])} ) storage.write_table( table_name='test_locations', table_data={'id': array([1,2]), 'cost': array([1000, 2000])} ) dataset_pool = DatasetPool(package_order=['opus_core'], storage=storage) test_agent_x_test_location = dataset_pool.get_dataset('test_agent_x_test_location') result = test_agent_x_test_location.compute_variables(expr, dataset_pool=dataset_pool) should_be = array([[1000, 2000], [20000, 40000], [500000, 1000000]]) self.assert_(ma.allclose(result, should_be, rtol=1e-6), msg = "Error in " + expr) name = VariableName(expr) # since the expression involves both test_agent and test_location, the dataset name should be None # and the interaction set names should be (test_agent, test_location) or (test_location, test_agent) self.assertEqual(name.get_dataset_name(), None) names = name.get_interaction_set_names() self.assertEqual(len(names),2) self.assert_('test_agent' in names) self.assert_('test_location' in names)
def compute_expression(self, attribute_name): """Compute any expression and return its values.""" var_name = VariableName(attribute_name) dataset_name = var_name.get_dataset_name() ds = self.get_dataset(dataset_name) return ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool())
def _compute_if_needed(self, name, dataset_pool, resources=None, quiet=False, version=None): """ Compute variable given by the argument 'name' only if this variable has not been computed before. Check first if this variable belongs to dataset1 or dataset2. dataset_pool holds available datasets. """ if not isinstance(name, VariableName): variable_name = VariableName(name) else: variable_name = name short_name = variable_name.get_alias() dataset_name = variable_name.get_dataset_name() if dataset_name == self.get_dataset_name(): new_version = UrbansimDataset._compute_if_needed(self, variable_name, dataset_pool, resources, quiet=quiet, version=version) else: if dataset_name == self.dataset1.get_dataset_name(): owner_dataset = self.dataset1 # index = self.get_2d_index_of_dataset1() elif dataset_name == self.dataset2.get_dataset_name(): owner_dataset = self.dataset2 # index = self.get_2d_index() else: self._raise_error(StandardError, "Cannot find variable '%s'\nin either dataset or in the interaction set." % variable_name.get_expression()) owner_dataset.compute_variables([variable_name], dataset_pool, resources=resources, quiet=True) new_version = self.compute_variables_return_versions_and_final_value("%s = %s.disaggregate(%s.%s)" % \ ( short_name, self.get_dataset_name(), owner_dataset.get_dataset_name(), short_name ), dataset_pool=dataset_pool, resources=resources, quiet=quiet )[0] return new_version
def test_constants(self): # test an expression involving two dataset names, one of which is *_constant expr = "test_agent.age<=opus_constant.young_age" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='test_agents', table_data={ "age": array([30, 20, 60, 80]), "id": array([1, 3, 4, 10]) }) storage.write_table(table_name='opus_constants', table_data={ "young_age": array([35]), "opus_constant_id": array([1]) }) dataset_pool = DatasetPool(storage=storage) # Test that the dataset name is correct for expr. It should be test_agent -- opus_constant just holds constants, # and is ignored as far as finding the dataset name for the expression. name = VariableName(expr) autogen = name.get_autogen_class() self.assertEqual(name.get_package_name(), None) self.assertEqual(name.get_dataset_name(), 'test_agent') # make an instance of the class and check the dependencies (it shouldn't depend on urbansim_constant) self.assertEqual(autogen().dependencies(), ['test_agent.age']) dataset = Dataset(in_storage=storage, in_table_name='test_agents', id_name="id", dataset_name="test_agent") result = dataset.compute_variables([expr], dataset_pool=dataset_pool) should_be = array([True, True, False, False]) self.assertEqual(ma.allequal(result, should_be), True)
def _compute_if_needed(self, name, dataset_pool, resources=None, quiet=False, version=None): """ Compute variable given by the argument 'name' only if this variable has not been computed before. Check first if this variable belongs to dataset1 or dataset2. dataset_pool holds available datasets. """ if not isinstance(name, VariableName): variable_name = VariableName(name) else: variable_name = name short_name = variable_name.get_alias() if (short_name in self.get_attribute_names()) and (self.are_dependent_variables_up_to_date( variable_name, version=version)): return version #nothing to be done dataset_name = variable_name.get_dataset_name() if dataset_name == self.get_dataset_name(): new_version = self._compute_one_variable(variable_name, dataset_pool, resources) else: owner_dataset, index = self.get_owner_dataset_and_index(dataset_name) if owner_dataset is None: self._raise_error(StandardError, "Cannot find variable '%s'\nin either dataset or in the interaction set." % variable_name.get_expression()) owner_dataset.compute_variables([variable_name], dataset_pool, resources=resources, quiet=True) new_version = self.add_attribute(data = owner_dataset.get_attribute_by_index(variable_name, index), name = variable_name, metadata = AttributeType.COMPUTED) attribute_box = owner_dataset._get_attribute_box(variable_name) variable = attribute_box.get_variable_instance() my_attribute_box = self._get_attribute_box(variable_name) my_attribute_box.set_variable_instance(variable) return new_version
def compute_expression(self, attribute_name): """Compute any expression and return its values.""" var_name = VariableName(attribute_name) dataset_name = var_name.get_dataset_name() ds = self.get_dataset(dataset_name) return ds.compute_variables([var_name], dataset_pool=self.get_dataset_pool())
def _do_flush_dependent_variables_if_required(self): try: if not SessionConfiguration().get('flush_variables', False): return except: return from opus_core.datasets.interaction_dataset import InteractionDataset dataset = self.get_dataset() dependencies = self.get_current_dependencies() my_dataset_name = dataset.get_dataset_name() for iattr in range(len(dependencies)): # iterate over dependent variables dep_item = dependencies[iattr][0] if isinstance(dep_item, str): depvar_name = VariableName(dep_item) else: depvar_name = dep_item.get_variable_name() # dep_item should be an instance of AttributeBox dataset_name = depvar_name.get_dataset_name() if dataset_name == my_dataset_name: ds = dataset else: ds = SessionConfiguration().get_dataset_from_pool(dataset_name) #ds = dataset_pool.get_dataset('dataset_name') if not isinstance(ds, InteractionDataset): short_name = depvar_name.get_alias() if short_name not in ds.get_id_name(): ds.flush_attribute(depvar_name)
def compute_expression(self, attribute_name): """Compute any expression and return its values.""" var_name = VariableName(attribute_name) dataset_name = var_name.get_dataset_name() ds = self.get_dataset(dataset_name) return ds.compute_variables( [var_name], dataset_pool=self.model_system.run_year_namespace["dataset_pool"])
def prepare_for_run(self, expressions_to_compute=None, dataset_pool=None): if expressions_to_compute is not None: if dataset_pool is None: dataset_pool = SessionConfiguration().get_dataset_pool() for expression in expressions_to_compute: vn = VariableName(expression) dataset_name = vn.get_dataset_name() dataset = dataset_pool[dataset_name] dataset.compute_variables(expression)
def compute_m(self, year, quantity_of_interest): variable_name = VariableName(quantity_of_interest) dataset_name = variable_name.get_dataset_name() for i in range(self.number_of_runs): ds = self._compute_variable_for_one_run(i, variable_name, dataset_name, year, self.observed_data.get_quantity_object(quantity_of_interest)) if i == 0: # first run self.m = zeros((ds.size(), self.number_of_runs), dtype=float32) self.m_ids = ds.get_id_attribute() self.m[:, i] = try_transformation(ds.get_attribute(variable_name), self.transformation_pair_for_prediction[0])
def run(self, year, cache_directory=None): """The class is initialized with the appropriate configuration info from the travel_model_configuration part of this config, and then copies the specified UrbanSim data into files for daysim to read. The variables/expressions to export are defined in the node travel_model_configuration/urbansim_to_tm_variable_mapping of the configuration file. """ if cache_directory is None: cache_directory = self.config['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() sc = SessionConfiguration(new_instance=True, package_order=self.config['dataset_pool_configuration'].package_order, in_storage=attribute_cache) dataset_pool = sc.get_dataset_pool() tm_config = self.config['travel_model_configuration'] data_to_export = tm_config['urbansim_to_tm_variable_mapping'] table_names = data_to_export.keys() variable_names = {} datasets = {} filenames = {} in_table_names = {} for table_name in table_names: filter = data_to_export[table_name].get('__filter__', None) if filter is not None: del data_to_export[table_name]['__filter__'] out_table_name = data_to_export[table_name].get('__out_table_name__', None) if out_table_name is not None: del data_to_export[table_name]['__out_table_name__'] else: out_table_name = table_name variables_to_export = map(lambda alias: "%s = %s" % (alias, data_to_export[table_name][alias]), data_to_export[table_name].keys()) dataset_name = None for var in variables_to_export: var_name = VariableName(var) if dataset_name is None: dataset_name = var_name.get_dataset_name() ds = dataset_pool.get_dataset(dataset_name) datasets[dataset_name] = ds filenames[dataset_name] = out_table_name in_table_names[dataset_name] = table_name if dataset_name not in variable_names.keys(): variable_names[dataset_name] = [] variable_names[dataset_name].append(var_name.get_alias()) ds.compute_variables([var_name], dataset_pool=dataset_pool) if filter is not None: filter_idx = where(ds.compute_variables(["__filter__ = %s" % filter], dataset_pool=dataset_pool)>0)[0] ds = DatasetSubset(ds, index = filter_idx) datasets[dataset_name] = ds return self._call_input_file_writer(year, datasets, in_table_names, filenames, variable_names, dataset_pool)
def test_fully_qualified_variable(self): # this tests an expression consisting of a fully-qualified variable expr = "opus_core.test_agent.income_times_2" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='test_agents', table_data={ "income": array([1, 5, 10]), "id": array([1, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='test_agents', id_name="id", dataset_name="test_agent") result = dataset.compute_variables([expr]) should_be = array([2, 10, 20]) self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_fully_qualified_variable") # check that expr is in the cache of known expressions # (normally we shouldn't be accessing this private field, but just this once ...) cache = VariableName._cache self.assert_(expr in cache, msg="did not find expr in cache") # check that the access methods for the variable all return the correct values name = VariableName(expr) self.assertEqual(name.get_package_name(), 'opus_core', msg="bad value for package") self.assertEqual(name.get_dataset_name(), 'test_agent', msg="bad value for dataset") self.assertEqual(name.get_short_name(), 'income_times_2', msg="bad value for shortname") self.assertEqual(name.get_alias(), 'income_times_2', msg="bad value for alias") self.assertEqual(name.get_autogen_class(), None, msg="bad value for autogen_class") # test that the variable can now also be accessed using its short name in an expression result2 = dataset.compute_variables(['income_times_2']) self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_variable") # check that the cache uses the variable name with whitespace removed oldsize = len(cache) expr_with_spaces = "opus_core . test_agent. income_times_2 " name2 = VariableName(expr_with_spaces) newsize = len(cache) self.assertEqual(oldsize, newsize, msg="caching error") self.assert_(expr_with_spaces not in cache, msg="caching error") self.assertEqual(expr_with_spaces, name2.get_expression(), msg="caching error") self.assertEqual(name2.get_short_name(), 'income_times_2', msg="bad value for shortname")
class ln_sampling_probability_for_bias_correction_mnl(Variable): """Abstract variable to be used for correcting for sampling bias when sampling alternatives. It is assumed to be an interaction variable. The init function gets the name of the attribute that is used for weighting alternatives in the model. It doesn't need to be normalized, that is done within the function. """ def __init__(self, weights_attribute): self.weights_attribute_name = weights_attribute Variable.__init__(self) def dependencies_to_add(self, dataset_name, package="urbansim"): """Will be added to the dependencies from the compute method, because before that we don't know the dataset name.""" self.weights_attribute = VariableName( "%s.%s.%s" % (package, dataset_name, self.weights_attribute_name)) return [ self.weights_attribute.get_expression(), "_normalized_weights_%s = %s/float(sum(%s))" % (self.weights_attribute_name, self.weights_attribute.get_expression(), self.weights_attribute.get_expression()), "_log_weights_%s = ln(%s._normalized_weights_%s)" % (self.weights_attribute_name, self.weights_attribute.get_dataset_name(), self.weights_attribute_name), "_log_1_minus_weights_%s = ln(1 - %s._normalized_weights_%s)" % (self.weights_attribute_name, self.weights_attribute.get_dataset_name(), self.weights_attribute_name) ] def compute(self, dataset_pool): ds = self.get_dataset() # interaction dataset self.add_and_solve_dependencies( self.dependencies_to_add(ds.get_dataset(2).get_dataset_name()), dataset_pool) log_1_minus_weights = ds.get_dataset(2).get_attribute( "_log_1_minus_weights_%s" % self.weights_attribute_name) result = log_1_minus_weights.sum() - ds.get_attribute("_log_1_minus_weights_%s" % self.weights_attribute_name).sum(axis=1).reshape((ds.get_reduced_n(),1)) - \ ds.get_attribute("_log_weights_%s" % self.weights_attribute_name) + ds.get_attribute("_log_weights_%s" % self.weights_attribute_name).sum(axis=1).reshape((ds.get_reduced_n(),1)) return result - result.max() # shift the values to zero
def run(self, year, condition=None, max_iter=10): """ 'year' is the current year of the simulation. 'condition' should be a boolean expression defined on any dataset. The method iterates over the given models until all values of the expression are True. 'max_iter' gives the maximum number of iterations to run, if 'condition' is not fulfilled. If it is None, there is no limit and thus, the condition must be fulfilled in order to terminate. If 'condition' is None, the set of models is run only once. """ self.config['years'] = (year, year) if condition is None: return self.model_system.run_in_same_process(self.config) dataset_pool = SessionConfiguration().get_dataset_pool() variable_name = VariableName(condition) dataset = dataset_pool.get_dataset(variable_name.get_dataset_name()) condition_value = dataset.compute_variables(variable_name, dataset_pool=dataset_pool) result = None iter = 1 while not alltrue(condition_value): result = self.model_system.run_in_same_process(self.config) if max_iter is None or iter > max_iter: break iter = iter + 1 # force to recompute the condition dataset = SessionConfiguration().get_dataset_pool().get_dataset( variable_name.get_dataset_name()) dataset.delete_computed_attributes() condition_value = dataset.compute_variables( variable_name, dataset_pool=SessionConfiguration().get_dataset_pool()) if not alltrue(condition_value): logger.log_status( '%s did not converge. Maximum number of iterations (%s) reached.' % (self.model_name, max_iter)) else: logger.log_status('%s converged in %s iterations.' % (self.model_name, iter - 1)) return result
def compute_m(self, year, quantity_of_interest, values=None, ids=None): if (values is not None) and (ids is not None): self._get_m_from_values(values, ids) return variable_name = VariableName(quantity_of_interest) dataset_name = variable_name.get_dataset_name() for i in range(self.cache_set.size): ds = self._compute_variable_for_one_run(i, variable_name, dataset_name, year) if i == 0: # first run m = zeros((ds.size(), self.cache_set.size), dtype=float32) self.m_ids = ds.get_id_attribute() m[:, i] = try_transformation(ds.get_attribute(variable_name), self.transformation_pair_for_prediction[0]) self.m = resize(average(m, axis=1), (m.shape[0], 1))
def test_dataset_qualified_attribute(self): expr = "tests.persons" storage = StorageFactory().get_storage("dict_storage") storage.write_table(table_name="tests", table_data={"persons": array([1, 5, 10]), "id": array([1, 3, 4])}) dataset = Dataset(in_storage=storage, in_table_name="tests", id_name="id", dataset_name="tests") result = dataset.compute_variables([expr]) self.assertEqual(ma.allclose(result, [1, 5, 10], rtol=1e-7), True, msg="error in test_attribute") # check that the access methods for the variable all return the correct values name = VariableName(expr) self.assertEqual(name.get_package_name(), None, msg="bad value for package") self.assertEqual(name.get_dataset_name(), "tests", msg="bad value for dataset") self.assertEqual(name.get_short_name(), "persons", msg="bad value for shortname") self.assertEqual(name.get_alias(), "persons", msg="bad value for alias") self.assertEqual(name.get_autogen_class(), None, msg="bad value for autogen_class")
def compute_values_from_multiple_runs(self, year, quantity_of_interest, dtype='float32', dataset_arguments={}): """ 'quantity_of_interest' is a variable name in its fully-qualified name. Return a matrix of size (dataset.size x number_of_runs), with values of the variable for each dataset member and run. Dataset is the one to which the quantity_of_interest belongs to. """ variable_name = VariableName(quantity_of_interest) dataset_name = variable_name.get_dataset_name() for i in range(self.cache_set.size): ds = self._compute_variable_for_one_run(i, variable_name, dataset_name, year, dataset_arguments=dataset_arguments) if i == 0: # first run result = zeros((ds.size(), self.cache_set.size), dtype=dtype) result[:, i] = ds.get_attribute(variable_name) return result
def _compute_if_needed(self, name, dataset_pool, resources=None, quiet=False, version=None): """ Compute variable given by the argument 'name' only if this variable has not been computed before. Check first if this variable belongs to dataset1 or dataset2. dataset_pool holds available datasets. """ if not isinstance(name, VariableName): variable_name = VariableName(name) else: variable_name = name short_name = variable_name.get_alias() dataset_name = variable_name.get_dataset_name() if dataset_name == self.get_dataset_name(): new_version = UrbansimDataset._compute_if_needed(self, variable_name, dataset_pool, resources, quiet=quiet, version=version) else: if dataset_name == self.dataset1.get_dataset_name(): owner_dataset = self.dataset1 # index = self.get_2d_index_of_dataset1() elif dataset_name == self.dataset2.get_dataset_name(): owner_dataset = self.dataset2 # index = self.get_2d_index() else: self._raise_error( StandardError, "Cannot find variable '%s'\nin either dataset or in the interaction set." % variable_name.get_expression()) owner_dataset.compute_variables([variable_name], dataset_pool, resources=resources, quiet=True) new_version = self.compute_variables_return_versions_and_final_value("%s = %s.disaggregate(%s.%s)" % \ ( short_name, self.get_dataset_name(), owner_dataset.get_dataset_name(), short_name ), dataset_pool=dataset_pool, resources=resources, quiet=quiet )[0] return new_version
def _solve_dependencies(self, dataset_pool): dataset = self.get_dataset() my_dataset_name = dataset.get_dataset_name() dependencies_list = self.get_current_dependencies() for i in range(len(dependencies_list)): # compute dependent variables dep_item = dependencies_list[i][0] if isinstance(dep_item, str): depvar_name = VariableName(dep_item) else: depvar_name = dep_item.get_variable_name() # dep_item should be an instance of AttributeBox dataset_name = depvar_name.get_dataset_name() version = dependencies_list[i][1] if dataset_name == my_dataset_name: ds = dataset else: ds = dataset_pool.get_dataset(dataset_name) (new_versions, value) = ds.compute_variables_return_versions_and_final_value([(depvar_name, version)], dataset_pool) self.dependencies_list[i] = (ds._get_attribute_box(depvar_name), new_versions[0])
def _update_variable_from_fields(self): ''' update the variable with values from the gui widgets ''' self.variable['name'] = str(self.leVarName.text()) self.variable['source'] = str(self.cboVarType.currentText()) self.variable['definition'] = str( self.le_var_def.document().toPlainText()) try: v = VariableName(self.variable['definition']) dataset_name = v.get_dataset_name() interaction_set_names = v.get_interaction_set_names() except (SyntaxError, ValueError): MessageBox.error( mainwindow=self, text='parse error for variable', detailed_text= 'setting dataset name for this variable to <unknown>') dataset_name = '<unknown>' interaction_set_names = None if dataset_name is None and interaction_set_names is not None: # It's an interaction set. Look up possible names in available_datasets names = get_available_dataset_names(self.validator.project) n1 = interaction_set_names[0] + '_x_' + interaction_set_names[1] if n1 in names: dataset_name = n1 else: n2 = interaction_set_names[1] + '_x_' + interaction_set_names[0] if n2 in names: dataset_name = n2 else: MessageBox.error( mainwindow=self, text= 'unable to find an interaction set in available_datasets for this variable', detailed_text= "tried %s and %s \nbut couldn't find either name in available_datasets \nsetting dataset_name to <unknown>" % (n1, n2)) dataset_name = '<unknown>' self.variable['dataset'] = dataset_name if self.rbUseModel.isChecked(): self.variable['use'] = 'model variable' elif self.rbUseIndicator.isChecked(): self.variable['use'] = 'indicator' else: self.variable['use'] = 'both'
def compute_expression(self, attribute_name, allow_missing=False): """Compute any expression and return its values. If allow_missing is True, the code does not break if an attribute cannot be computed or a dataset is missing. """ var_name = VariableName(attribute_name) dataset_name = var_name.get_dataset_name() try: ds = self.dataset_pool.get_dataset(dataset_name) except FileNotFoundError: if allow_missing: return None raise try: return ds.compute_variables([var_name], dataset_pool=self.dataset_pool) except (LookupError, FileNotFoundError, StandardError): if allow_missing: return np.zeros(ds.size(), dtype="bool8") raise
def add_prefix_to_variable_names(self, variable_names, dataset, variable_package, resources): """Add a prefix of 'package.dataset_name.' to variable_names from resources. """ if resources is None: return if not isinstance(variable_names, list): variable_names = [variable_names] for variable_name in variable_names: variable_string = resources.get(variable_name, None) if variable_string is not None: variable_string_name = VariableName(variable_string) if (variable_string_name.get_dataset_name() == None) and \ (variable_string_name.get_autogen_class() is None) : add_string = "" if variable_string_name.get_package_name() == None: add_string = "%s." % variable_package add_string = add_string + dataset.get_dataset_name() + "." resources.merge({ variable_name:add_string+variable_string})
def add_prefix_to_variable_names(self, variable_names, dataset, variable_package, resources): """Add a prefix of 'package.dataset_name.' to variable_names from resources. """ if resources is None: return if not isinstance(variable_names, list): variable_names = [variable_names] for variable_name in variable_names: variable_string = resources.get(variable_name, None) if variable_string is not None: variable_string_name = VariableName(variable_string) if (variable_string_name.get_dataset_name() == None) and \ (variable_string_name.get_autogen_class() is None) : add_string = "" if variable_string_name.get_package_name() == None: add_string = "%s." % variable_package add_string = add_string + dataset.get_dataset_name() + "." resources.merge( {variable_name: add_string + variable_string})
def get_var(self, name): #creates a fake dataset, required for variable resolution def create_fake_dataset(dataset_name): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='fake_dataset', table_data={ 'id':array([], dtype='int32') } ) dataset = Dataset(in_storage=storage, in_table_name='fake_dataset', dataset_name=dataset_name, id_name="id") return dataset var = VariableName(name) dataset = var.get_dataset_name() try: return self.factory.get_variable(var, create_fake_dataset(dataset), quiet=True) except LookupError: #print "LOOKUP ERROR: " + name return None
def test_alias_attribute(self): # this tests an expression consisting of an alias for a primary attribute expr = "p = persons" storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='tests', table_data={ "persons":array([1,5,10]), "id":array([1,3,4]) } ) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") result = dataset.compute_variables([expr]) self.assertEqual(ma.allclose(result, [1,5,10], rtol=1e-7), True, msg="error in test_alias_attribute") # check that the access methods for the variable all return the correct values name = VariableName(expr) self.assertEqual(name.get_package_name(), None, msg="bad value for package") self.assertEqual(name.get_dataset_name(), None, msg="bad value for dataset") self.assert_(name.get_short_name().startswith('autogen'), msg="bad value for shortname") self.assertEqual(name.get_alias(), 'p', msg="bad value for alias") self.assertNotEqual(name.get_autogen_class(), None, msg="bad value for autogen_class")
def get_all_dependencies(self): """Return all variables and attributes needed to compute this variable. This is returned as a list of tuples where the first element is either AttributeBox or VariableName of the dependent variable and the second element is the version for which this variable was computed. """ def create_fake_dataset(dataset_name): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='fake_dataset', table_data={ 'id':array([], dtype='int32') } ) dataset = Dataset(in_storage=storage, in_table_name='fake_dataset', dataset_name=dataset_name, id_name="id") return dataset result_others = [] dependencies_list = self.get_current_dependencies() for i in range(len(dependencies_list)): dep_item = dependencies_list[i][0] version = dependencies_list[i][1] isprimary = 0 if isinstance(dep_item, str): depvar_name = VariableName(dep_item) dataset_name = depvar_name.get_dataset_name() var = VariableFactory().get_variable(depvar_name, create_fake_dataset(dataset_name), quiet=True) result_others = result_others + [(depvar_name, version)] else: # dep_item should be an instance of AttributeBox var = dep_item.get_variable_instance() result_others = result_others + [(dep_item, version)] isprimary = dep_item.is_primary() if (var <> None) and (not isprimary): res = var.get_all_dependencies() result_others = result_others + res return result_others
def _solve_dependencies(self, dataset_pool): dataset = self.get_dataset() my_dataset_name = dataset.get_dataset_name() dependencies_list = self.get_current_dependencies() for i in range(len(dependencies_list)): # compute dependent variables dep_item = dependencies_list[i][0] if isinstance(dep_item, str): depvar_name = VariableName(dep_item) else: depvar_name = dep_item.get_variable_name( ) # dep_item should be an instance of AttributeBox dataset_name = depvar_name.get_dataset_name() version = dependencies_list[i][1] if dataset_name == my_dataset_name: ds = dataset else: ds = dataset_pool.get_dataset(dataset_name) (new_versions, value) = ds.compute_variables_return_versions_and_final_value( [(depvar_name, version)], dataset_pool) self.dependencies_list[i] = (ds._get_attribute_box(depvar_name), new_versions[0])
def get_all_dependencies(self): """Return all variables and attributes needed to compute this variable. This is returned as a list of tuples where the first element is either AttributeBox or VariableName of the dependent variable and the second element is the version for which this variable was computed. """ def create_fake_dataset(dataset_name): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='fake_dataset', table_data={'id': array([], dtype='int32')}) dataset = Dataset(in_storage=storage, in_table_name='fake_dataset', dataset_name=dataset_name, id_name="id") return dataset result_others = [] dependencies_list = self.get_current_dependencies() for i in range(len(dependencies_list)): dep_item = dependencies_list[i][0] version = dependencies_list[i][1] isprimary = 0 if isinstance(dep_item, str): depvar_name = VariableName(dep_item) dataset_name = depvar_name.get_dataset_name() var = VariableFactory().get_variable( depvar_name, create_fake_dataset(dataset_name), quiet=True) result_others = result_others + [(depvar_name, version)] else: # dep_item should be an instance of AttributeBox var = dep_item.get_variable_instance() result_others = result_others + [(dep_item, version)] isprimary = dep_item.is_primary() if (var <> None) and (not isprimary): res = var.get_all_dependencies() result_others = result_others + res return result_others
def test_fully_qualified_DDD_SSS_variable(self): # this should use the test variable a_test_SSS_variable_DDD_SSS expr = "opus_core.tests.a_test_squid_variable_42_clam" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='tests', table_data={ "a_dependent_variable": array([1, 5, 10]), "id": array([1, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") result = dataset.compute_variables([expr]) should_be = array([10, 50, 100]) self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_fully_qualified_DDD_SSS_variable") # check that the access methods for the variable all return the correct values name = VariableName(expr) self.assertEqual(name.get_package_name(), 'opus_core', msg="bad value for package") self.assertEqual(name.get_dataset_name(), 'tests', msg="bad value for dataset") self.assertEqual(name.get_short_name(), 'a_test_squid_variable_42_clam', msg="bad value for shortname") self.assertEqual(name.get_alias(), 'a_test_squid_variable_42_clam', msg="bad value for alias") self.assertEqual(name.get_autogen_class(), None, msg="bad value for autogen_class") # test that the variable can now also be accessed using its short name in an expression result2 = dataset.compute_variables(['a_test_squid_variable_42_clam']) self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_squid_variable_42_clam")
def _update_variable_from_fields(self): ''' update the variable with values from the gui widgets ''' self.variable['name'] = str(self.leVarName.text()) self.variable['source'] = str(self.cboVarType.currentText()) self.variable['definition'] = str(self.le_var_def.document().toPlainText()) try: v = VariableName(self.variable['definition']) dataset_name = v.get_dataset_name() interaction_set_names = v.get_interaction_set_names() except (SyntaxError, ValueError): MessageBox.error(mainwindow = self, text = 'parse error for variable', detailed_text = 'setting dataset name for this variable to <unknown>') dataset_name = '<unknown>' interaction_set_names = None if dataset_name is None and interaction_set_names is not None: # It's an interaction set. Look up possible names in available_datasets names = get_available_dataset_names(self.validator.project) n1 = interaction_set_names[0] + '_x_' + interaction_set_names[1] if n1 in names: dataset_name = n1 else: n2 = interaction_set_names[1] + '_x_' + interaction_set_names[0] if n2 in names: dataset_name = n2 else: MessageBox.error(mainwindow = self, text = 'unable to find an interaction set in available_datasets for this variable', detailed_text = "tried %s and %s \nbut couldn't find either name in available_datasets \nsetting dataset_name to <unknown>" % (n1,n2) ) dataset_name = '<unknown>' self.variable['dataset'] = dataset_name if self.rbUseModel.isChecked(): self.variable['use'] = 'model variable' elif self.rbUseIndicator.isChecked(): self.variable['use'] = 'indicator' else: self.variable['use'] = 'both'
def compute_values_from_multiple_runs(self, year, quantity_of_interest, dtype='float32', dataset_arguments={}): """ 'quantity_of_interest' is a variable name in its fully-qualified name. Return a matrix of size (dataset.size x number_of_runs), with values of the variable for each dataset member and run. Dataset is the one to which the quantity_of_interest belongs to. """ variable_name = VariableName(quantity_of_interest) dataset_name = variable_name.get_dataset_name() for i in range(self.cache_set.size): ds = self._compute_variable_for_one_run( i, variable_name, dataset_name, year, dataset_arguments=dataset_arguments) if i == 0: # first run result = zeros((ds.size(), self.cache_set.size), dtype=dtype) result[:, i] = ds.get_attribute(variable_name) return result
def get_attribute(self, name): """ Return an array of the (by the argument name) given attribute. """ if not isinstance(name, VariableName): attr_name = VariableName(name) else: attr_name = name alias = attr_name.get_alias() dataset_name = attr_name.get_dataset_name() if not (alias in self.get_attribute_names()): if dataset_name == self.get_dataset(1).dataset_name: index = self.get_2d_index_of_dataset1() return self.get_dataset(1).get_attribute_by_index(attr_name, index) if dataset_name == self.get_dataset(2).dataset_name: index = self.get_2d_index() return self.get_dataset(2).get_attribute_by_index(attr_name, index) if alias in self.get_dataset(1).get_known_attribute_names(): index = self.get_2d_index_of_dataset1() return self.get_dataset(1).get_attribute_by_index(attr_name, index) if alias in self.get_dataset(2).get_known_attribute_names(): index = self.get_2d_index() return self.get_dataset(2).get_attribute_by_index(attr_name, index) self._raise_error(NameError, "Variable %s not found!" % alias) return self.attribute_boxes[alias].get_data()
def test_alias_fully_qualified_variable(self): expr = "x = opus_core.tests.a_test_variable" storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='tests', table_data={ "a_dependent_variable":array([1,5,10]), "id":array([1,3,4]) } ) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") result = dataset.compute_variables([expr]) should_be = array([10,50,100]) self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_alias_fully_qualified_variable") # check that the new var has x as an alias v = VariableName(expr) self.assertEqual(v.get_package_name(), None, msg="bad value for package_name") self.assertEqual(v.get_dataset_name(), 'tests', msg="bad value for dataset_name") self.assert_(v.get_short_name().startswith('autogen'), msg="bad value for shortname") self.assertEqual(v.get_alias(), 'x', msg="bad value for alias") self.assertNotEqual(v.get_autogen_class(), None, msg="bad value for autogen_class") # check that the alias has the correct value result2 = dataset.compute_variables(['x']) self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_variable")
def _analyze_aggregation_method_call(self, receiver, method, args): same, vars = match(SUBPATTERN_AGGREGATION, args) if not same: raise ValueError, "syntax error for aggregation method call" arg_dict = self._get_arguments( ('arg1', 'arg2', 'arg3'), ('aggr_var', 'intermediates', 'function'), vars) if 'aggr_var' not in arg_dict: raise ValueError, "syntax error for aggregation method call (problem with argument for variable being aggregated)" same1, vars1 = match(SUBPATTERN_FULLY_QUALIFIED_VARIABLE_ARG, arg_dict['aggr_var']) if same1: # the aggregated variable is a fully-qualified name pkg = vars1['package'] dataset = vars1['dataset'] attr = vars1['shortname'] else: same2, vars2 = match(SUBPATTERN_DATASET_QUALIFIED_VARIABLE_ARG, arg_dict['aggr_var']) if same2: # the aggregated variable is a dataset-qualified name pkg = None dataset = vars2['dataset'] attr = vars2['shortname'] else: # The thing being aggregated is an expression. Generate a new autogen variable for that expression, # and use the autogen variable in the aggregation call. subexpr = arg_dict['aggr_var'] newvar = VariableName(parsetree_to_string(subexpr)) pkg = None dataset = newvar.get_dataset_name() if dataset is None: raise ValueError, "syntax error for aggregation method call - could not determine dataset for variable being aggregated" attr = newvar.get_short_name() # TODO DELETE BELOW: # replacements = {'dataset': dataset, 'attribute': attr} # newvar_tree = parsetree_substitute(DATASET_QUALIFIED_VARIABLE_TEMPLATE, replacements) # self._parsetree_replacements[subexpr] = newvar_tree if 'intermediates' in arg_dict: # make sure that it really is a list s, v = match(SUBPATTERN_LIST_ARG, arg_dict['intermediates']) if not s: raise ValueError, "syntax error for aggregation method call (list of intermediate datasets not a list?)" intermediates = tuple( self._extract_names(arg_dict['intermediates'])) else: intermediates = () if 'function' in arg_dict: # bind fcn to a string that is the name of the function, or to the string "None" s, v = match(SUBPATTERN_NAME_ARG, arg_dict['function']) if not s: raise ValueError, "syntax error for aggregation method call (problem with the function argument in the call)" fcn = v['name'] else: fcn = None self._aggregation_calls.add( (receiver, method, pkg, dataset, attr, intermediates, fcn)) quoted_intermediates = "" if len(intermediates) == 0 else quote( intermediates[0]) for n in intermediates[1:]: quoted_intermediates = quoted_intermediates + ', ' + quote(n) # 'call' is a string representing the new aggregation call. Parse it, extract the args, and then add a replacement to # parsetree_replacements for the old args. We want to replace just the args and not the entire call to aggregate, # since the way Python represents parsetrees the whole tree may include astype and exponentiation calls, and it's simpler # to just replace the args part. call = "%s.%s(%s, %s,%s, [%s], %s)" % ( receiver, method, quote(pkg), quote(dataset), quote(attr), quoted_intermediates, quote(fcn)) (newtree, _) = self._parse_expr(call) s, v = match(FULL_EXPRESSION_METHOD_CALL, newtree) if not s: raise StandardError, 'internal error - problem generating new aggregation expression' self._parsetree_replacements[args] = v['args']
def run(self, dataset1, dataset2, index1=None, index2=None, sample_size=10, weight=None, include_chosen_choice=False, with_replacement=False, resources=None, dataset_pool=None): """this function samples number of sample_size (scalar value) alternatives from dataset2 for agent set specified by dataset1. If index1 is not None, only samples alterantives for agents with indices in index1; if index2 is not None, only samples alternatives from indices in index2. sample_size specifies number of alternatives to be sampled for each agent. weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d array of the same length as index2 or 2d array of shape (index1.size, index2.size). Also refer to document of interaction_dataset""" if dataset_pool is None: try: sc = SessionConfiguration() dataset_pool = sc.get_dataset_pool() except: dataset_pool = DatasetPool() local_resources = Resources(resources) local_resources.merge_if_not_None({ "dataset1": dataset1, "dataset2": dataset2, "index1": index1, "index2": index2, "sample_size": sample_size, "weight": weight, "with_replacement": with_replacement, "include_chosen_choice": include_chosen_choice }) local_resources.check_obligatory_keys( ['dataset1', 'dataset2', 'sample_size']) agent = local_resources["dataset1"] index1 = local_resources.get("index1", None) if index1 is None: index1 = arange(agent.size()) choice = local_resources["dataset2"] index2 = local_resources.get("index2", None) if index2 is None: index2 = arange(choice.size()) if index1.size == 0 or index2.size == 0: err_msg = "either choice size or agent size is zero, return None" logger.log_warning(err_msg) return None include_chosen_choice = local_resources.get("include_chosen_choice", False) J = local_resources["sample_size"] if include_chosen_choice: J = J - 1 with_replacement = local_resources.get("with_replacement") weight = local_resources.get("weight", None) if isinstance(weight, str): if weight in choice.get_known_attribute_names(): weight = choice.get_attribute(weight) rank_of_weight = 1 else: varname = VariableName(weight) if varname.get_dataset_name() == choice.get_dataset_name(): weight = choice.compute_variables( weight, dataset_pool=dataset_pool) rank_of_weight = 1 elif varname.get_interaction_set_names() is not None: ## weights can be an interaction variable interaction_dataset = InteractionDataset(local_resources) weight = interaction_dataset.compute_variables( weight, dataset_pool=dataset_pool) rank_of_weight = 2 assert (len(weight.shape) >= rank_of_weight) else: err_msg = ("weight is neither a known attribute name " "nor a simple variable from the choice dataset " "nor an interaction variable: '%s'" % weight) logger.log_error(err_msg) raise ValueError, err_msg elif isinstance(weight, ndarray): rank_of_weight = weight.ndim elif not weight: ## weight is None or empty string weight = ones(index2.size) rank_of_weight = 1 else: err_msg = "unkown weight type" logger.log_error(err_msg) raise TypeError, err_msg if (weight.size <> index2.size) and (weight.shape[rank_of_weight - 1] <> index2.size): if weight.shape[rank_of_weight - 1] == choice.size(): if rank_of_weight == 1: weight = take(weight, index2) if rank_of_weight == 2: weight = take(weight, index2, axis=1) else: err_msg = "weight array size doesn't match to size of dataset2 or its index" logger.log_error(err_msg) raise ValueError, err_msg prob = normalize(weight) #chosen_choice = ones(index1.size) * UNPLACED_ID chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1] #index_of_placed_agent = where(greater(chosen_choice_id, UNPLACED_ID))[0] chosen_choice_index = choice.try_get_id_index( chosen_choice_id, return_value_if_not_found=UNPLACED_ID) chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID) if rank_of_weight == 1: # if weight_array is 1d, then each agent shares the same weight for choices replace = with_replacement # sampling with no replacement non_zero_counts = nonzerocounts(weight) if non_zero_counts < J: logger.log_warning( "weight array dosen't have enough non-zero counts, use sample with replacement" ) replace = True if non_zero_counts > 0: sampled_index = prob2dsample( index2, sample_size=(index1.size, J), prob_array=prob, exclude_index=chosen_choice_index_to_index2, replace=replace, return_index=True) else: # all alternatives have a zero weight sampled_index = zeros((index1.size, 0), dtype=DTYPE) #return index2[sampled_index] if rank_of_weight == 2: sampled_index = zeros((index1.size, J), dtype=DTYPE) - 1 for i in range(index1.size): replace = with_replacement # sampling with/without replacement i_prob = prob[i, :] if nonzerocounts(i_prob) < J: logger.log_warning( "weight array dosen't have enough non-zero counts, use sample with replacement" ) replace = True #exclude_index passed to probsample_noreplace needs to be indexed to index2 sampled_index[i, :] = probsample_noreplace( index2, sample_size=J, prob_array=i_prob, exclude_index=chosen_choice_index_to_index2[i], return_index=True) sampling_prob = take(prob, sampled_index) sampled_index_within_prob = sampled_index.copy() sampled_index = index2[sampled_index] is_chosen_choice = zeros(sampled_index.shape, dtype="bool") #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32") if include_chosen_choice: sampled_index = column_stack( (chosen_choice_index[:, newaxis], sampled_index)) is_chosen_choice = zeros(sampled_index.shape, dtype="bool") is_chosen_choice[chosen_choice_index != UNPLACED_ID, 0] = 1 #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1] ## this is necessary because prob is indexed to index2, not to the choice set (as is chosen_choice_index) sampling_prob_for_chosen_choices = take( prob, chosen_choice_index_to_index2[:, newaxis]) ## if chosen choice chosen equals unplaced_id then the sampling prob is 0 sampling_prob_for_chosen_choices[where( chosen_choice_index == UNPLACED_ID)[0], ] = 0.0 sampling_prob = column_stack( [sampling_prob_for_chosen_choices, sampling_prob]) interaction_dataset = self.create_interaction_dataset( dataset1, dataset2, index1, sampled_index) interaction_dataset.add_attribute(sampling_prob, '__sampling_probability') interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice') if local_resources.get("include_mnl_bias_correction_term", False): if include_chosen_choice: sampled_index_within_prob = column_stack( (chosen_choice_index_to_index2[:, newaxis], sampled_index_within_prob)) interaction_dataset.add_mnl_bias_correction_term( prob, sampled_index_within_prob) ## to get the older returns #sampled_index = interaction_dataset.get_2d_index() #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") #where_chosen = where(interaction_dataset.get_attribute("chosen_choice")) #chosen_choices[where_chosen[0]]=where_chosen[1] #return (sampled_index, chosen_choice) return interaction_dataset
class ObservedDataOneQuantity: """ Class for storing information about one quantity measure. It is to be grouped in an object of class ObservedData. """ # pairs of inverse transformations transformation_pairs = {"sqrt": "**2", "log":"exp", "exp": "log", "**2": "sqrt"} def __init__(self, variable_name, observed_data, filename=None, transformation=None, inverse_transformation=None, filter=None, match=False, dependent_datasets={}, **kwargs): """ 'variable_name' is a quantity about which we have data available. 'observed_data' is of type ObservedData, it is the grouping parent. 'filename' is the name of file where the data is stored. It can be None, if the observed_data.directory is a cache. 'transformation' is an operation to be performed on the data (e.g. sqrt, log), 'inverse_transformation' is the inverse function of 'transformation'. If it not given, it is determined automatically. 'filter' is a variable that will be applied to both, the observed data and the simulated data. 'match' (logical) determines if the dataset should be matched (by ids) with the simulated dataset. Elements that don't match are eliminated from the simulated dataset. 'dependent_datasets' (if any) should be a dictionary of dataset_name:{'filename': filename, 'match': True|False, **kwargs}. They will be added to the dataset_pool. Remaining arguments are passed into DatasetFactory, thus it can contain information about how to create the corresponding dataset. """ self.variable_name = VariableName(variable_name) self.dataset_name = self.variable_name.get_dataset_name() dataset_pool = observed_data.get_dataset_pool() self.matching_datasets = {} if dataset_pool is None: kwargs.update({'in_storage':observed_data.get_storage(), 'in_table_name': filename}) try: self.dataset = DatasetFactory().search_for_dataset(self.dataset_name, observed_data.get_package_order(), arguments=kwargs) except: # take generic dataset self.dataset = Dataset(dataset_name=self.dataset_name, **kwargs) else: self.dataset = dataset_pool.get_dataset(self.dataset_name) if match: self.add_match(self.dataset) for dep_dataset_name, info in dependent_datasets.iteritems(): if dataset_pool is None: dataset_pool = DatasetPool(storage=observed_data.get_storage(), package_order=observed_data.get_package_order()) info.update({'in_storage':observed_data.get_storage(), 'in_table_name': info.get('filename')}) del info['filename'] match = False if 'match' in info.keys(): match = info['match'] del info['match'] try: dep_dataset = DatasetFactory().search_for_dataset(dep_dataset_name, observed_data.get_package_order(), arguments=info) except: dep_dataset = Dataset(dataset_name=dep_dataset_name, **info) dataset_pool.replace_dataset(dep_dataset_name, dep_dataset) if match: self.add_match(dep_dataset) if self.variable_name.get_alias() not in self.dataset.get_known_attribute_names(): self.dataset.compute_variables([self.variable_name], dataset_pool=dataset_pool) if filter is not None: filter_values = self.dataset.compute_variables([filter], dataset_pool=dataset_pool) idx = where(filter_values > 0)[0] self.add_match(self.dataset, idx) self.dataset.subset_by_index(idx) self.transformation = transformation self.inverse_transformation = inverse_transformation if (self.transformation is not None) and (self.inverse_transformation is None): self.inverse_transformation = self.transformation_pairs[self.transformation] def get_values(self): return self.dataset.get_attribute(self.variable_name) def get_transformed_values(self): return try_transformation(self.get_values(), self.transformation) def get_variable_name(self): return self.variable_name def get_dataset(self): return self.dataset def get_dataset_name(self): return self.dataset_name def get_transformation(self): return self.transformation def get_transformation_pair(self): return (self.transformation, self.inverse_transformation) def add_match(self, dataset, index = None): dataset_name = dataset.get_dataset_name() result = zeros(dataset.size(), dtype='bool8') idx = index if index is None: idx = arange(dataset.size()) result[idx] = 1 if dataset_name in self.matching_datasets.keys(): tmp = zeros(dataset.size(), dtype='bool8') tmp[dataset.get_id_index(self.matching_datasets[dataset_name])]=1 result = result*tmp self.matching_datasets[dataset_name] = dataset.get_id_attribute()[where(result)] def get_matching_datasets(self): return self.matching_datasets
estimation_results = model_system.run_year_namespace[results_name] model_data.append({"data":data, 'index_chosen':index_chosen, 'sampling_probability': sampling_prob, 'variable_names':variable_names, 'estimation_results':estimation_results, 'model_name':model_name, 'choice_type':options.choice_type[h][i] }) if options.market_share[h][i]: ms_expression = options.market_share[h][i] ms_variablename = VariableName(ms_expression) dataset_name = ms_variablename.get_dataset_name() ds = model_system.run_year_namespace[dataset_name] or model_system.run_year_namespace['datasets'][dataset_name] id_name = ds.get_id_name()[0] ds.compute_variables([ms_variablename], dataset_pool=dataset_pool) ms = ds.get_multiple_attributes([id_name, ms_variablename.get_alias()]) market_ids = m.choice_set.compute_one_variable_with_unknown_package( id_name, dataset_pool=dataset_pool) market_ids_2d = market_ids[m.model_interaction.get_choice_index()] model_data[i].update({'market_id':market_ids_2d, 'market_share':ms}) logger.end_block() training_data.append(model_data) config = xmlconfig.get_run_configuration(options.scenario_name) if not options.agents_index: agent_set = dataset_pool.get_dataset(options.agent_set)