def test_aggregate_all_mean(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={ 'my_variable': array([4, 8, 10, 1]), 'id': array([1, 2, 3, 4]), }) storage.write_table(table_name='regions', table_data={ "id": array([1]), }) ds = Dataset(in_storage=storage, in_table_name='zones', id_name="id", dataset_name="myzone") ds2 = Dataset(in_storage=storage, in_table_name='regions', id_name="id", dataset_name="myregion") dataset_pool = DatasetPool() dataset_pool._add_dataset('myzone', ds) dataset_pool._add_dataset('myregion', ds2) ds2.compute_variables([ "myvar = myregion.aggregate_all(myzone.my_variable, function=mean)" ], dataset_pool=dataset_pool) values = ds2.get_attribute("myvar") should_be = array([5.75]) self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in aggregate_all_mean")
def test_aggregate_bad_function(self): # the 'function' argument must be a single name -- test this expr = "zone.aggregate(2*gridcell.my_variable, function=3+4)" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={ 'zone_id': array([1, 2]), }) storage.write_table(table_name='gridcells', table_data={ 'my_variable': array([4, 8, 0.5, 1]), 'grid_id': array([1, 2, 3, 4]), 'zone_id': array([1, 2, 1, 2]), }) zone_dataset = Dataset(in_storage=storage, in_table_name='zones', id_name="zone_id", dataset_name='zone') gridcell_dataset = Dataset(in_storage=storage, in_table_name='gridcells', id_name="grid_id", dataset_name='gridcell') dataset_pool = DatasetPool() dataset_pool._add_dataset('gridcell', gridcell_dataset) dataset_pool._add_dataset('zone', zone_dataset) self.assertRaises(ValueError, zone_dataset.compute_variables, [expr], dataset_pool=dataset_pool)
def test_disaggregate_and_multiply(self): # Perform two different disaggregations and multiply the results. This tests using a dataset name in both the # list of intermediates and as the dataset being disaggregated (myfaz in this case). expr = "myzone.disaggregate(myfaz.fazsqft) * myzone.disaggregate(myfazdistr.my_variable, intermediates=[myfaz])" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={ 'id0':arange(7)+1, 'id1':array([1,3,1,2,3,2,1]) } ) storage.write_table(table_name='fazes', table_data={ 'id1':array([1,2,3]), 'id2':array([1,2,1]), 'fazsqft':array([10,50,100]) } ) storage.write_table(table_name='fazdistrs', table_data={ 'my_variable':array([40,50]), 'id2':array([1,2]) } ) ds0 = Dataset(in_storage=storage, in_table_name='zones', id_name="id0", dataset_name="myzone") ds1 = Dataset(in_storage=storage, in_table_name='fazes', id_name="id1", dataset_name="myfaz") ds2 = Dataset(in_storage=storage, in_table_name='fazdistrs', id_name="id2", dataset_name="myfazdistr") dataset_pool = DatasetPool() dataset_pool._add_dataset('myzone', ds0) dataset_pool._add_dataset('myfaz', ds1) dataset_pool._add_dataset('myfazdistr', ds2) values = ds0.compute_variables([expr], dataset_pool=dataset_pool) should_be = array([400, 4000, 400, 2500, 4000, 2500, 400]) self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in disaggregate_and_multiply")
def test_disaggregate_one_level(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={ 'id0':arange(7)+1, 'id1':array([1,3,1,2,3,2,1]) } ) storage.write_table(table_name='fazes', table_data={ 'id1':array([1,2,3]), 'id2':array([1,2,1]) } ) storage.write_table(table_name='fazdistr', table_data={ 'my_variable':array([40,50]), 'id2':array([1,2]) } ) ds0 = Dataset(in_storage=storage, in_table_name='zones', id_name="id0", dataset_name="myzone") ds1 = Dataset(in_storage=storage, in_table_name='fazes', id_name="id1", dataset_name="myfaz") ds2 = Dataset(in_storage=storage, in_table_name='fazdistr', id_name="id2", dataset_name="myfazdistr") dataset_pool = DatasetPool() dataset_pool._add_dataset('myzone', ds0) dataset_pool._add_dataset('myfaz', ds1) dataset_pool._add_dataset('myfazdistr', ds2) values = ds0.compute_variables(["myzone.disaggregate(myfazdistr.my_variable, intermediates=[myfaz])"], dataset_pool=dataset_pool) should_be = array([40, 40, 40, 50, 40,50, 40]) self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in disaggregate_one_level")
def test_disaggregate(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={ 'id': array([1, 2, 3, 4]), 'id2': array([1, 2, 1, 2]) }) storage.write_table(table_name='faz', table_data={ 'my_variable': array([4, 8]), 'id2': array([1, 2]) }) ds = Dataset(in_storage=storage, in_table_name='zones', id_name="id", dataset_name="myzone") ds2 = Dataset(in_storage=storage, in_table_name='faz', id_name="id2", dataset_name="myfaz") dataset_pool = DatasetPool() dataset_pool._add_dataset('myzone', ds) dataset_pool._add_dataset('myfaz', ds2) values = ds.compute_variables( ["myzone.disaggregate(myfaz.my_variable)"], dataset_pool=dataset_pool) should_be = array([4, 8, 4, 8]) self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in disaggregate")
def test_aggregate(self): # test aggregate with no function specified (so defaults to 'sum') storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={ 'zone_id': array([1, 2]), }) storage.write_table(table_name='gridcells', table_data={ 'my_variable': array([4, 8, 0.5, 1]), 'grid_id': array([1, 2, 3, 4]), 'zone_id': array([1, 2, 1, 2]), }) zone_dataset = Dataset(in_storage=storage, in_table_name='zones', id_name="zone_id", dataset_name='zone') gridcell_dataset = Dataset(in_storage=storage, in_table_name='gridcells', id_name="grid_id", dataset_name='gridcell') dataset_pool = DatasetPool() dataset_pool._add_dataset('gridcell', gridcell_dataset) dataset_pool._add_dataset('zone', zone_dataset) values = zone_dataset.compute_variables( ['zone.aggregate(gridcell.my_variable)'], dataset_pool=dataset_pool) should_be = array([4.5, 9]) self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in aggregate")
def test_change_with_index_and_filter(self): """The secondary dataset is restricted by index and filter.""" data = { 'my_id': array([1, 2, 3, 4, 5, 6]), 'attr': array([10, 20, 30, 50, 46, 100]), 'attr2': array(6 * [1]) } data2 = {'attr': array([20, 6, 7, 3, 10, 30, 100, 50])} storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data=data) dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name='my_id') storage.write_table(table_name='dataset2', table_data=data2) dataset2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='attr') JoinAttributeModificationModel().run(dataset, dataset2, index=array([0, 1, 2, 7]), attribute_to_be_modified='attr2', filter='attr > 20') self.assertEqual( ma.allequal(dataset.get_attribute('attr2'), array([1, 1, 1, 0, 1, 1])), True)
def test_join_by_rows(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset1', table_data={ 'id':array([2,4,6,8]), 'attr':array([4,7,2,1]) } ) storage.write_table( table_name='dataset2', table_data={ 'id':array([1,5,9]), 'attr':array([55,66,100]) } ) ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id') ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id') ds1.join_by_rows(ds2) self.assert_(ma.allclose(ds1.get_attribute('attr'), array([4,7,2,1,55,66,100]))) self.assert_(ma.allclose(ds2.get_attribute('attr'), array([55,66,100])))
def test_aggregate_sum(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={ 'my_variable': array([4, 8, 0.5, 1]), 'id': array([1, 2, 3, 4]), 'id2': array([1, 2, 1, 2]), }) storage.write_table(table_name='faz', table_data={"id2": array([1, 2])}) ds = Dataset(in_storage=storage, in_table_name='zones', id_name="id", dataset_name="myzone") ds2 = Dataset(in_storage=storage, in_table_name='faz', id_name="id2", dataset_name="myfaz") dataset_pool = DatasetPool() dataset_pool._add_dataset('myzone', ds) dataset_pool._add_dataset('myfaz', ds2) values = ds2.compute_variables( ['myfaz.aggregate(10.0*myzone.my_variable, function=sum)'], dataset_pool=dataset_pool) should_be = array([45, 90]) self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in aggregate_sum")
def setUp(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='households', table_data={ 'household_id': arange(10) + 1, 'grid_id': arange(-1, 9, 1) + 1, 'lucky': array([1, 0, 1, 0, 1, 1, 1, 1, 0, 0]) }) storage.write_table( table_name='gridcells', table_data={ 'grid_id': arange(15) + 1, 'filter': array([0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1]), 'weight': array([0.1, 9, 15, 2, 5, 1, 6, 2.1, .3, 4, 3, 1, 10, 8, 7]) }) #create households self.households = Dataset(in_storage=storage, in_table_name='households', id_name="household_id", dataset_name="household") # create gridcells self.gridcells = Dataset(in_storage=storage, in_table_name='gridcells', id_name="grid_id", dataset_name="gridcell")
def test_disaggregate_fully_qualified_variable(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={ 'zone_id': array([1, 2, 3, 4]), 'id': array([1, 2, 1, 2]) }) # it would be nicer to call this table 'fazzes' but we want to use the existing test variable storage.write_table(table_name='test_locations', table_data={ 'cost': array([4, 8]), 'id': array([1, 2]) }) zone_dataset = Dataset(in_storage=storage, in_table_name='zones', id_name="zone_id", dataset_name="zone") test_dataset = Dataset(in_storage=storage, in_table_name='test_locations', id_name="id", dataset_name='test_location') dataset_pool = DatasetPool() dataset_pool._add_dataset('zone', zone_dataset) dataset_pool._add_dataset('test_location', test_dataset) values = zone_dataset.compute_variables( ['zone.disaggregate(opus_core.test_location.cost_times_3)'], dataset_pool=dataset_pool) should_be = array([12, 24, 12, 24]) self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in test_disaggregate_fully_qualified_variable")
def test_join_by_rows_for_char_arrays(self): from numpy import alltrue storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset1', table_data={ 'id':array([2,4,6,8]), 'attr':array(['4','7','2','1']) } ) storage.write_table( table_name='dataset2', table_data={ 'id':array([1,5,9]), 'attr':array(['55','66','100']) } ) ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id') ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id') ds1.join_by_rows(ds2) self.assert_(alltrue(ds1.get_attribute('attr') == array(['4','7','2','1','55','66','100']))) self.assert_(alltrue(ds2.get_attribute('attr') == array(['55','66','100'])))
def test_aggregate_unqualified_name(self): # test aggregate without the dataset provided for the variable being aggregated expr = 'zone.aggregate(my_variable)' # to be correct, should be 'zone.aggregate(gridcell.my_variable)' storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={ 'zone_id': array([1, 2]), }) storage.write_table(table_name='gridcells', table_data={ 'my_variable': array([4, 8, 0.5, 1]), 'grid_id': array([1, 2, 3, 4]), 'zone_id': array([1, 2, 1, 2]), }) zone_dataset = Dataset(in_storage=storage, in_table_name='zones', id_name="zone_id", dataset_name='zone') gridcell_dataset = Dataset(in_storage=storage, in_table_name='gridcells', id_name="grid_id", dataset_name='gridcell') dataset_pool = DatasetPool() dataset_pool._add_dataset('gridcell', gridcell_dataset) dataset_pool._add_dataset('zone', zone_dataset) self.assertRaises(ValueError, zone_dataset.compute_variables, [expr], dataset_pool=dataset_pool)
def test_agent_times_choice(self): expression = 'agent_x_choice.agent_times_choice(attr)' storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='agents', table_data={'id': array([1, 2, 3, 4, 5]), 'attr_2': array([3, 2, 4, 10, 20]), 'attr_3': array([10, 100, 1000, 500, 0]), 'attr_4': array([100, 500, 0, 20, -30]) } ) storage.write_table(table_name='choices', table_data={'id': array([1, 2, 3, 4])} ) agents = Dataset(in_storage=storage, in_table_name='agents', dataset_name='agent', id_name='id') choices = Dataset(in_storage=storage, in_table_name='choices', dataset_name='choice', id_name='id') ids = InteractionDataset(dataset1=agents, dataset2=choices, index1=array([0,1,3,4]), index2=array([1,2,3])) result = ids.compute_variables(expression) should_be = array([[3, 10, 100], [2,100,500], [10,500, 20], [20, 0, -30]]) self.assertEqual(ma.allequal(result, should_be), True) agents.touch_attribute('attr_2') # in order to recompute the expression choices.add_primary_attribute(name='name', data=array(['bus', 'car', 'tran', 'walk'])) agents.add_primary_attribute(name='attr_tran', data=array([100, 1000, 10000, 5000,10])) result = ids.compute_variables(expression) should_be = array([[3, 100, 100], [2,1000,500], [10,5000, 20], [20, 10, -30]]) self.assertEqual(ma.allequal(result, should_be), True)
def test_join_datasets_with_2_ids(self): from numpy import ma storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='data1', table_data={ 'id1':array([2,4,2]), 'id2':array([1,2,3]), 'attr1':array([4,7,1]), 'attr2':array([100,0,1000]), } ) storage.write_table( table_name='data2', table_data={ 'id1':array([4,2,2]), 'id2':array([2,3,1]), 'attr1':array([50,60,70]) } ) ds1 = Dataset(in_storage=storage, in_table_name='data1', id_name=['id1', 'id2'], dataset_name='data1') ds2 = Dataset(in_storage=storage, in_table_name='data2', id_name=['id1', 'id2'], dataset_name='data2') ds1.join(ds2, 'attr1') self.assertEqual(ma.allequal(ds1.get_attribute('attr1'), array([70,50,60])), True) self.assertEqual(ma.allequal(ds1.get_attribute('attr2'), array([100,0,1000])), True)
def test_aggregate_fully_qualified_variable(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={ 'zone_id': array([1, 2]), }) # it would be nicer to call this table 'gridcells' but we want to use the existing test variable storage.write_table(table_name='tests', table_data={ 'a_dependent_variable': array([4, 8, 0.5, 1]), 'id': array([1, 2, 3, 4]), 'zone_id': array([1, 2, 1, 2]), }) zone_dataset = Dataset(in_storage=storage, in_table_name='zones', id_name="zone_id", dataset_name='zone') test_dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name='tests') dataset_pool = DatasetPool() dataset_pool._add_dataset('zone', zone_dataset) dataset_pool._add_dataset('tests', test_dataset) values = zone_dataset.compute_variables( ['zone.aggregate(opus_core.tests.a_test_variable)'], dataset_pool=dataset_pool) should_be = array([45, 90]) self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in test_aggregate_fully_qualified_variable")
def test_aggregate_squared_with_cast(self): # more exercising the SUBPATTERN_NUMBER_OF_AGENTS_WITH_CAST tree pattern storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={ 'zone_id': array([1, 2]), }) storage.write_table(table_name='gridcells', table_data={ 'my_variable': array([4, 8, 0.5, 1]), 'grid_id': array([1, 2, 3, 4]), 'zone_id': array([1, 2, 1, 2]), }) zone_dataset = Dataset(in_storage=storage, in_table_name='zones', id_name="zone_id", dataset_name='zone') gridcell_dataset = Dataset(in_storage=storage, in_table_name='gridcells', id_name="grid_id", dataset_name='gridcell') dataset_pool = DatasetPool() dataset_pool._add_dataset('gridcell', gridcell_dataset) dataset_pool._add_dataset('zone', zone_dataset) values = zone_dataset.compute_variables( ['(zone.aggregate(gridcell.my_variable)**2).astype(float32)'], dataset_pool=dataset_pool) should_be = array([4.5 * 4.5, 9.0 * 9.0]) self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in aggregate")
def test_join_by_rows_for_unique_ids(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset1', table_data={ "id":array([2,4]), "attr":array([4,7]) } ) storage.write_table( table_name='dataset2', table_data={ "id":array([1,2]), "attr":array([55,66]) } ) ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id') ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id') threw_exception = False try: ds1.join_by_rows(ds2) except StandardError: threw_exception = True self.assert_(threw_exception)
def test_number_of_agents_expression(self): expr = "mygridcell.number_of_agents(myjob)+10" storage = StorageFactory().get_storage('dict_storage') gridcell_grid_id = array([1, 2, 3]) job_grid_id = array( [2, 1, 3, 1] ) #specify an array of 4 jobs, 1st job's grid_id = 2 (it's in gridcell 2), etc. storage.write_table(table_name='gridcells', table_data={'gid': gridcell_grid_id}) storage.write_table(table_name='jobs', table_data={ 'jid': arange(4) + 1, 'gid': job_grid_id }) gs = Dataset(in_storage=storage, in_table_name='gridcells', id_name="gid", dataset_name="mygridcell") jobs = Dataset(in_storage=storage, in_table_name='jobs', id_name="jid", dataset_name="myjob") values = gs.compute_variables([expr], resources=Resources({ "myjob": jobs, "mygridcell": gs })) should_be = array([12, 11, 11]) self.assert_(ma.allclose(values, should_be, rtol=1e-7), msg="Error in " + expr)
def __init__(self, variable_name, observed_data, filename=None, transformation=None, inverse_transformation=None, filter=None, match=False, dependent_datasets={}, **kwargs): """ 'variable_name' is a quantity about which we have data available. 'observed_data' is of type ObservedData, it is the grouping parent. 'filename' is the name of file where the data is stored. It can be None, if the observed_data.directory is a cache. 'transformation' is an operation to be performed on the data (e.g. sqrt, log), 'inverse_transformation' is the inverse function of 'transformation'. If it not given, it is determined automatically. 'filter' is a variable that will be applied to both, the observed data and the simulated data. 'match' (logical) determines if the dataset should be matched (by ids) with the simulated dataset. Elements that don't match are eliminated from the simulated dataset. 'dependent_datasets' (if any) should be a dictionary of dataset_name:{'filename': filename, 'match': True|False, **kwargs}. They will be added to the dataset_pool. Remaining arguments are passed into DatasetFactory, thus it can contain information about how to create the corresponding dataset. """ self.variable_name = VariableName(variable_name) self.dataset_name = self.variable_name.get_dataset_name() dataset_pool = observed_data.get_dataset_pool() self.matching_datasets = {} if dataset_pool is None: kwargs.update({'in_storage':observed_data.get_storage(), 'in_table_name': filename}) try: self.dataset = DatasetFactory().search_for_dataset(self.dataset_name, observed_data.get_package_order(), arguments=kwargs) except: # take generic dataset self.dataset = Dataset(dataset_name=self.dataset_name, **kwargs) else: self.dataset = dataset_pool.get_dataset(self.dataset_name) if match: self.add_match(self.dataset) for dep_dataset_name, info in dependent_datasets.iteritems(): if dataset_pool is None: dataset_pool = DatasetPool(storage=observed_data.get_storage(), package_order=observed_data.get_package_order()) info.update({'in_storage':observed_data.get_storage(), 'in_table_name': info.get('filename')}) del info['filename'] match = False if 'match' in info.keys(): match = info['match'] del info['match'] try: dep_dataset = DatasetFactory().search_for_dataset(dep_dataset_name, observed_data.get_package_order(), arguments=info) except: dep_dataset = Dataset(dataset_name=dep_dataset_name, **info) dataset_pool.replace_dataset(dep_dataset_name, dep_dataset) if match: self.add_match(dep_dataset) if self.variable_name.get_alias() not in self.dataset.get_known_attribute_names(): self.dataset.compute_variables([self.variable_name], dataset_pool=dataset_pool) if filter is not None: filter_values = self.dataset.compute_variables([filter], dataset_pool=dataset_pool) idx = where(filter_values > 0)[0] self.add_match(self.dataset, idx) self.dataset.subset_by_index(idx) self.transformation = transformation self.inverse_transformation = inverse_transformation if (self.transformation is not None) and (self.inverse_transformation is None): self.inverse_transformation = self.transformation_pairs[self.transformation]
def _search_for_dataset_helper(self, dataset_name, package_order, use_hidden_id, **kwargs): # this part of the search_for_dataset code is factored into a helper method, rather than passing in # use_hidden_id as a keyword parameter with a default value of False, so that we don't pass this # keyword parameter along to the get_dataset method for package_name in package_order: try: dataset = self.get_dataset(dataset_name, package=package_name, **kwargs) if dataset is not None: break except ImportError: continue else: from opus_core.datasets.dataset import Dataset from opus_core.resources import Resources resources = Resources(kwargs.get('arguments', {})) if use_hidden_id: id_name_default = [] else: id_name_default = "%s_id" % dataset_name (table_name, module_name, class_name ) = self._table_module_class_names_for_dataset(dataset_name) ## set table_name and id_name_default as default values in resources (arguments) resources.merge_with_defaults({ 'dataset_name': dataset_name, 'in_table_name': table_name, 'out_table_name': table_name, 'id_name': id_name_default }) try: dataset = Dataset(resources=resources) except: # try to create a dataset using deprecated values (table_name, module_name, class_name ) = self._table_module_class_names_for_dataset_deprecated( dataset_name) resources = Resources(kwargs.get('arguments', {})) resources.merge_with_defaults({ 'dataset_name': dataset_name, 'in_table_name': table_name, 'out_table_name': table_name, 'id_name': id_name_default }) try: dataset = Dataset(resources=resources) except: logger.log_warning( "Could not create a generic Dataset '%s'." % dataset_name) raise #TODO: uncomment this warning when we change to singular #logger.log_warning("Dataset %s was created using deprecated table name - using plural will not be supported in the future." % dataset_name) return dataset
def prepare_for_estimate(specification_dict=None, specification_storage=None, specification_table=None, agent_set=None, household_set=None, agents_for_estimation_storage=None, agents_for_estimation_table=None, households_for_estimation_table=None, join_datasets=False, filter=None, data_objects=None): specification = get_specification_for_estimation(specification_dict, specification_storage, specification_table) if agents_for_estimation_storage is not None: estimation_set = Dataset(in_storage=agents_for_estimation_storage, in_table_name=agents_for_estimation_table, id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name()) hh_estimation_set = None if households_for_estimation_table is not None: hh_estimation_set = Dataset( in_storage=agents_for_estimation_storage, in_table_name=households_for_estimation_table, id_name=household_set.get_id_name(), dataset_name=household_set.get_dataset_name()) filter_index = arange(estimation_set.size()) if filter: estimation_set.compute_variables(filter, resources=Resources(data_objects)) filter_index = where(estimation_set.get_attribute(filter) > 0)[0] #estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False) if join_datasets: if hh_estimation_set is not None: household_set.join_by_rows(hh_estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) agent_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True) index = arange(agent_set.size() - estimation_set.size(), agent_set.size())[filter_index] else: index = agent_set.get_id_index( estimation_set.get_id_attribute()[filter_index]) else: if agent_set is not None: index = arange(agent_set.size()) else: index = None return (specification, index)
def test_aggregate_sum_two_levels(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='zones', table_data={ 'my_variable': array([4, 8, 2, 1, 40, 23, 78, 20, 25]), 'id0': arange(9) + 1, 'id1': array([1, 3, 1, 2, 3, 2, 1, 4, 4]) }) storage.write_table(table_name='fazes', table_data={ 'id1': array([1, 2, 3, 4]), 'id2': array([1, 2, 1, 3]) }) storage.write_table(table_name='fazdistrs', table_data={ 'id2': array([1, 2, 3]), 'id3': array([1, 2, 1]) }) storage.write_table(table_name='neighborhoods', table_data={"id3": array([1, 2])}) ds0 = Dataset(in_storage=storage, in_table_name='zones', id_name="id0", dataset_name="myzone") ds1 = Dataset(in_storage=storage, in_table_name='fazes', id_name="id1", dataset_name="myfaz") ds2 = Dataset(in_storage=storage, in_table_name='fazdistrs', id_name="id2", dataset_name="myfazdistr") ds3 = Dataset(in_storage=storage, in_table_name='neighborhoods', id_name="id3", dataset_name="myneighborhood") dataset_pool = DatasetPool() dataset_pool._add_dataset('myzone', ds0) dataset_pool._add_dataset('myfaz', ds1) dataset_pool._add_dataset('myfazdistr', ds2) dataset_pool._add_dataset('myneighborhood', ds3) values = ds3.compute_variables([ 'myneighborhood.aggregate(myzone.my_variable, intermediates=[myfaz,myfazdistr], function=sum)' ], dataset_pool=dataset_pool) should_be = array([177, 24]) self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in aggregate_sum_two_levels")
def test_versioning_with_aggregate(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='households', table_data={ 'my_variable': array([4, 8, 2, 1, 40, 23, 78]), 'id0': arange(7) + 1, 'id1': array([1, 3, 1, 2, 3, 2, 1]) }) storage.write_table(table_name='fazes', table_data={ 'id1': array([1, 2, 3]), 'id2': array([1, 2, 1]) }) storage.write_table(table_name='fazdistr', table_data={'id2': array([1, 2])}) ds0 = Dataset(in_storage=storage, in_table_name='households', id_name="id0", dataset_name="myhousehold") ds1 = Dataset(in_storage=storage, in_table_name='fazes', id_name="id1", dataset_name="myfaz") ds2 = Dataset(in_storage=storage, in_table_name='fazdistr', id_name="id2", dataset_name="myfazdistr") dataset_pool = DatasetPool() dataset_pool._add_dataset('myhousehold', ds0) dataset_pool._add_dataset('myfaz', ds1) dataset_pool._add_dataset('myfazdistr', ds2) ds0.modify_attribute("id1", array([1, 3, 1, 2, 3, 2, 1])) # has version 1 variable = 'my_var = myfazdistr.aggregate(10.0*myhousehold.my_variable, intermediates=[myfaz])' ds2.compute_variables([variable], dataset_pool=dataset_pool) self.assert_(ds2.get_version("my_var") == 0) ds2.compute_variables([variable], dataset_pool=dataset_pool) self.assert_( ds2.get_version("my_var") == 0) # version should stay the same, i.e. it should not recompute ds0.touch_attribute("id1") # has version 2 ds2.compute_variables([variable], dataset_pool=dataset_pool) self.assert_( ds2.get_version("my_var") == 1) # version should be 1, i.e. it should recompute when id changes ds1.touch_attribute("id2") # has version 1 ds2.compute_variables([variable], dataset_pool=dataset_pool) self.assert_( ds2.get_version("my_var") == 2) # version should be 2, i.e. it should recompute when id changes
def test_alias_complex_expression(self): # aliasing a complex expression expr = "x = 2*sqrt(var1+var2)" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data={ "var1": array([4, -8, 0.5, 1]), "var2": array([3, 3, 7, 7]), "id": array([1, 2, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name="id", dataset_name="mydataset") result = dataset.compute_variables([expr]) should_be = array([5.29150262, 0.0, 5.47722558, 5.65685425]) self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_alias_complex_expression") # check that the new var has x as an alias v = VariableName(expr) self.assertEqual(v.get_alias(), 'x', msg="bad value for alias") # check that the alias gives the correct value result2 = dataset.compute_variables(['x']) self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_variable")
def test_alias_attribute_same_name(self): # this tests an expression consisting of an alias for a primary attribute that is the same name as the primary attribute expr = "persons = persons" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='tests', table_data={ "persons": array([1, 5, 10]), "id": array([1, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") result = dataset.compute_variables([expr]) self.assertEqual(ma.allclose(result, [1, 5, 10], rtol=1e-7), True, msg="error in test_alias_attribute") name = VariableName(expr) self.assertEqual(name.get_short_name(), 'persons', msg="bad value for shortname") self.assertEqual(name.get_alias(), 'persons', msg="bad value for alias") self.assertEqual(name.get_autogen_class(), None, msg="bad value for autogen_class")
def test_alias_with_delete_computed_attributes(self): # Make an alias for an expression, then delete all computed attributes, then use the same alias # for a different expression. This tests that the dictionary of aliases that have been defined # is cleared when you delete attributes. expr1 = "x = 2*sqrt(var1+var2)" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data={ "var1": array([4, -8, 0.5, 1]), "var2": array([3, 3, 7, 7]), "id": array([1, 2, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name="id", dataset_name="mydataset") result = dataset.compute_variables([expr1]) should_be = array([5.29150262, 0.0, 5.47722558, 5.65685425]) self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_alias_with_delete_computed_attributes") dataset.delete_computed_attributes() # now alias x to a different expression expr2 = "x = var1+10" # check that the new var has x as an alias result2 = dataset.compute_variables([expr2]) should_be2 = array([14, 2, 10.5, 11]) self.assert_(ma.allclose(result2, should_be2, rtol=1e-6), "Error in test_alias_with_delete_computed_attributes")
def test_alias_fully_qualified_variable_same_name(self): expr = "a_test_variable = opus_core.tests.a_test_variable" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='tests', table_data={ "a_dependent_variable": array([1, 5, 10]), "id": array([1, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") result = dataset.compute_variables([expr]) should_be = array([10, 50, 100]) self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_alias_fully_qualified_variable") result2 = dataset.compute_variables(['a_test_variable']) self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_variable") v = VariableName(expr) # check that no autogen class was generated self.assertEqual(v.get_autogen_class(), None, msg="bad value for autogen_class") # check that the alias is correct self.assertEqual(v.get_alias(), 'a_test_variable', msg="bad value for alias")
def test_alias_fully_qualified_variable(self): expr = "x = opus_core.tests.a_test_variable" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='tests', table_data={ "a_dependent_variable": array([1, 5, 10]), "id": array([1, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") result = dataset.compute_variables([expr]) should_be = array([10, 50, 100]) self.assert_(ma.allclose(result, should_be, rtol=1e-6), "Error in test_alias_fully_qualified_variable") # check that the new var has x as an alias v = VariableName(expr) self.assertEqual(v.get_package_name(), None, msg="bad value for package_name") self.assertEqual(v.get_dataset_name(), 'tests', msg="bad value for dataset_name") self.assert_(v.get_short_name().startswith('autogen'), msg="bad value for shortname") self.assertEqual(v.get_alias(), 'x', msg="bad value for alias") self.assertNotEqual(v.get_autogen_class(), None, msg="bad value for autogen_class") # check that the alias has the correct value result2 = dataset.compute_variables(['x']) self.assert_(ma.allclose(result2, should_be, rtol=1e-6), "Error in accessing a_test_variable")
def test_alias_attribute(self): # this tests an expression consisting of an alias for a primary attribute expr = "p = persons" storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='tests', table_data={ "persons": array([1, 5, 10]), "id": array([1, 3, 4]) }) dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests") result = dataset.compute_variables([expr]) self.assertEqual(ma.allclose(result, [1, 5, 10], rtol=1e-7), True, msg="error in test_alias_attribute") # check that the access methods for the variable all return the correct values name = VariableName(expr) self.assertEqual(name.get_package_name(), None, msg="bad value for package") self.assertEqual(name.get_dataset_name(), None, msg="bad value for dataset") self.assert_(name.get_short_name().startswith('autogen'), msg="bad value for shortname") self.assertEqual(name.get_alias(), 'p', msg="bad value for alias") self.assertNotEqual(name.get_autogen_class(), None, msg="bad value for autogen_class")