def test_aggregate_all_mean(self):
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
                         table_data={
                             'my_variable': array([4, 8, 10, 1]),
                             'id': array([1, 2, 3, 4]),
                         })
     storage.write_table(table_name='regions',
                         table_data={
                             "id": array([1]),
                         })
     ds = Dataset(in_storage=storage,
                  in_table_name='zones',
                  id_name="id",
                  dataset_name="myzone")
     ds2 = Dataset(in_storage=storage,
                   in_table_name='regions',
                   id_name="id",
                   dataset_name="myregion")
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('myzone', ds)
     dataset_pool._add_dataset('myregion', ds2)
     ds2.compute_variables([
         "myvar = myregion.aggregate_all(myzone.my_variable, function=mean)"
     ],
                           dataset_pool=dataset_pool)
     values = ds2.get_attribute("myvar")
     should_be = array([5.75])
     self.assert_(ma.allclose(values, should_be, rtol=1e-6),
                  "Error in aggregate_all_mean")
Example #2
0
 def test_aggregate_bad_function(self):
     # the 'function' argument must be a single name -- test this
     expr = "zone.aggregate(2*gridcell.my_variable, function=3+4)"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
                         table_data={
                             'zone_id': array([1, 2]),
                         })
     storage.write_table(table_name='gridcells',
                         table_data={
                             'my_variable': array([4, 8, 0.5, 1]),
                             'grid_id': array([1, 2, 3, 4]),
                             'zone_id': array([1, 2, 1, 2]),
                         })
     zone_dataset = Dataset(in_storage=storage,
                            in_table_name='zones',
                            id_name="zone_id",
                            dataset_name='zone')
     gridcell_dataset = Dataset(in_storage=storage,
                                in_table_name='gridcells',
                                id_name="grid_id",
                                dataset_name='gridcell')
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('gridcell', gridcell_dataset)
     dataset_pool._add_dataset('zone', zone_dataset)
     self.assertRaises(ValueError,
                       zone_dataset.compute_variables, [expr],
                       dataset_pool=dataset_pool)
 def test_disaggregate_and_multiply(self):
     # Perform two different disaggregations and multiply the results.  This tests using a dataset name in both the
     # list of intermediates and as the dataset being disaggregated (myfaz in this case).
     expr = "myzone.disaggregate(myfaz.fazsqft) * myzone.disaggregate(myfazdistr.my_variable, intermediates=[myfaz])"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
         table_data={
             'id0':arange(7)+1,
             'id1':array([1,3,1,2,3,2,1])
             }
         )
     storage.write_table(table_name='fazes',
         table_data={
             'id1':array([1,2,3]),
             'id2':array([1,2,1]),
             'fazsqft':array([10,50,100])
             }
         )
     storage.write_table(table_name='fazdistrs',
         table_data={
             'my_variable':array([40,50]), 
             'id2':array([1,2])
             }
         )
     ds0 = Dataset(in_storage=storage, in_table_name='zones', id_name="id0", dataset_name="myzone")
     ds1 = Dataset(in_storage=storage, in_table_name='fazes', id_name="id1", dataset_name="myfaz")             
     ds2 = Dataset(in_storage=storage, in_table_name='fazdistrs', id_name="id2", dataset_name="myfazdistr")
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('myzone', ds0)
     dataset_pool._add_dataset('myfaz', ds1)
     dataset_pool._add_dataset('myfazdistr', ds2)
     values = ds0.compute_variables([expr], dataset_pool=dataset_pool)
     should_be = array([400, 4000, 400, 2500, 4000, 2500, 400])
     self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in disaggregate_and_multiply")
 def test_disaggregate_one_level(self):
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
         table_data={
             'id0':arange(7)+1,
             'id1':array([1,3,1,2,3,2,1])
             }
         )
     storage.write_table(table_name='fazes',
         table_data={
             'id1':array([1,2,3]),
             'id2':array([1,2,1])
             }
         )
     storage.write_table(table_name='fazdistr',
         table_data={
             'my_variable':array([40,50]), 
             'id2':array([1,2])
             }
         )
     ds0 = Dataset(in_storage=storage, in_table_name='zones', id_name="id0", dataset_name="myzone")
     ds1 = Dataset(in_storage=storage, in_table_name='fazes', id_name="id1", dataset_name="myfaz")             
     ds2 = Dataset(in_storage=storage, in_table_name='fazdistr', id_name="id2", dataset_name="myfazdistr")
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('myzone', ds0)
     dataset_pool._add_dataset('myfaz', ds1)
     dataset_pool._add_dataset('myfazdistr', ds2)
     values = ds0.compute_variables(["myzone.disaggregate(myfazdistr.my_variable, intermediates=[myfaz])"], dataset_pool=dataset_pool)
     should_be = array([40, 40, 40, 50, 40,50, 40])
     self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in disaggregate_one_level") 
 def test_disaggregate(self):
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
                         table_data={
                             'id': array([1, 2, 3, 4]),
                             'id2': array([1, 2, 1, 2])
                         })
     storage.write_table(table_name='faz',
                         table_data={
                             'my_variable': array([4, 8]),
                             'id2': array([1, 2])
                         })
     ds = Dataset(in_storage=storage,
                  in_table_name='zones',
                  id_name="id",
                  dataset_name="myzone")
     ds2 = Dataset(in_storage=storage,
                   in_table_name='faz',
                   id_name="id2",
                   dataset_name="myfaz")
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('myzone', ds)
     dataset_pool._add_dataset('myfaz', ds2)
     values = ds.compute_variables(
         ["myzone.disaggregate(myfaz.my_variable)"],
         dataset_pool=dataset_pool)
     should_be = array([4, 8, 4, 8])
     self.assert_(ma.allclose(values, should_be, rtol=1e-6),
                  "Error in disaggregate")
 def test_aggregate(self):
     # test aggregate with no function specified (so defaults to 'sum')
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
                         table_data={
                             'zone_id': array([1, 2]),
                         })
     storage.write_table(table_name='gridcells',
                         table_data={
                             'my_variable': array([4, 8, 0.5, 1]),
                             'grid_id': array([1, 2, 3, 4]),
                             'zone_id': array([1, 2, 1, 2]),
                         })
     zone_dataset = Dataset(in_storage=storage,
                            in_table_name='zones',
                            id_name="zone_id",
                            dataset_name='zone')
     gridcell_dataset = Dataset(in_storage=storage,
                                in_table_name='gridcells',
                                id_name="grid_id",
                                dataset_name='gridcell')
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('gridcell', gridcell_dataset)
     dataset_pool._add_dataset('zone', zone_dataset)
     values = zone_dataset.compute_variables(
         ['zone.aggregate(gridcell.my_variable)'],
         dataset_pool=dataset_pool)
     should_be = array([4.5, 9])
     self.assert_(ma.allclose(values, should_be, rtol=1e-6),
                  "Error in aggregate")
    def test_change_with_index_and_filter(self):
        """The secondary dataset is restricted by index and filter."""
        data = {
            'my_id': array([1, 2, 3, 4, 5, 6]),
            'attr': array([10, 20, 30, 50, 46, 100]),
            'attr2': array(6 * [1])
        }
        data2 = {'attr': array([20, 6, 7, 3, 10, 30, 100, 50])}
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='dataset', table_data=data)
        dataset = Dataset(in_storage=storage,
                          in_table_name='dataset',
                          id_name='my_id')
        storage.write_table(table_name='dataset2', table_data=data2)
        dataset2 = Dataset(in_storage=storage,
                           in_table_name='dataset2',
                           id_name='attr')
        JoinAttributeModificationModel().run(dataset,
                                             dataset2,
                                             index=array([0, 1, 2, 7]),
                                             attribute_to_be_modified='attr2',
                                             filter='attr > 20')
        self.assertEqual(
            ma.allequal(dataset.get_attribute('attr2'),
                        array([1, 1, 1, 0, 1, 1])), True)
Example #8
0
 def test_join_by_rows(self):
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(
         table_name='dataset1', 
         table_data={    
             'id':array([2,4,6,8]), 
             'attr':array([4,7,2,1])
             }
         )
         
     storage.write_table(
         table_name='dataset2',
         table_data={
             'id':array([1,5,9]), 
             'attr':array([55,66,100])
             }
         )
     
     ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id')
     ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id')
     
     ds1.join_by_rows(ds2)
     self.assert_(ma.allclose(ds1.get_attribute('attr'), array([4,7,2,1,55,66,100])))
     self.assert_(ma.allclose(ds2.get_attribute('attr'), array([55,66,100])))
 def test_aggregate_sum(self):
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
                         table_data={
                             'my_variable': array([4, 8, 0.5, 1]),
                             'id': array([1, 2, 3, 4]),
                             'id2': array([1, 2, 1, 2]),
                         })
     storage.write_table(table_name='faz',
                         table_data={"id2": array([1, 2])})
     ds = Dataset(in_storage=storage,
                  in_table_name='zones',
                  id_name="id",
                  dataset_name="myzone")
     ds2 = Dataset(in_storage=storage,
                   in_table_name='faz',
                   id_name="id2",
                   dataset_name="myfaz")
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('myzone', ds)
     dataset_pool._add_dataset('myfaz', ds2)
     values = ds2.compute_variables(
         ['myfaz.aggregate(10.0*myzone.my_variable, function=sum)'],
         dataset_pool=dataset_pool)
     should_be = array([45, 90])
     self.assert_(ma.allclose(values, should_be, rtol=1e-6),
                  "Error in aggregate_sum")
    def setUp(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='households',
                            table_data={
                                'household_id': arange(10) + 1,
                                'grid_id': arange(-1, 9, 1) + 1,
                                'lucky': array([1, 0, 1, 0, 1, 1, 1, 1, 0, 0])
                            })

        storage.write_table(
            table_name='gridcells',
            table_data={
                'grid_id':
                arange(15) + 1,
                'filter':
                array([0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1]),
                'weight':
                array([0.1, 9, 15, 2, 5, 1, 6, 2.1, .3, 4, 3, 1, 10, 8, 7])
            })

        #create households
        self.households = Dataset(in_storage=storage,
                                  in_table_name='households',
                                  id_name="household_id",
                                  dataset_name="household")

        # create gridcells
        self.gridcells = Dataset(in_storage=storage,
                                 in_table_name='gridcells',
                                 id_name="grid_id",
                                 dataset_name="gridcell")
 def test_disaggregate_fully_qualified_variable(self):
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
                         table_data={
                             'zone_id': array([1, 2, 3, 4]),
                             'id': array([1, 2, 1, 2])
                         })
     # it would be nicer to call this table 'fazzes' but we want to use the existing test variable
     storage.write_table(table_name='test_locations',
                         table_data={
                             'cost': array([4, 8]),
                             'id': array([1, 2])
                         })
     zone_dataset = Dataset(in_storage=storage,
                            in_table_name='zones',
                            id_name="zone_id",
                            dataset_name="zone")
     test_dataset = Dataset(in_storage=storage,
                            in_table_name='test_locations',
                            id_name="id",
                            dataset_name='test_location')
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('zone', zone_dataset)
     dataset_pool._add_dataset('test_location', test_dataset)
     values = zone_dataset.compute_variables(
         ['zone.disaggregate(opus_core.test_location.cost_times_3)'],
         dataset_pool=dataset_pool)
     should_be = array([12, 24, 12, 24])
     self.assert_(ma.allclose(values, should_be, rtol=1e-6),
                  "Error in test_disaggregate_fully_qualified_variable")
Example #12
0
 def test_join_by_rows_for_char_arrays(self):
     from numpy import alltrue
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(
         table_name='dataset1', 
         table_data={
             'id':array([2,4,6,8]), 
             'attr':array(['4','7','2','1'])
             }
         )
         
     storage.write_table(
         table_name='dataset2',
         table_data={
             'id':array([1,5,9]), 
             'attr':array(['55','66','100'])
             }
         )
     
     ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id')
     ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id')
     
     ds1.join_by_rows(ds2)
     self.assert_(alltrue(ds1.get_attribute('attr') == array(['4','7','2','1','55','66','100'])))
     self.assert_(alltrue(ds2.get_attribute('attr') == array(['55','66','100'])))
Example #13
0
 def test_aggregate_unqualified_name(self):
     # test aggregate without the dataset provided for the variable being aggregated
     expr = 'zone.aggregate(my_variable)'
     # to be correct, should be 'zone.aggregate(gridcell.my_variable)'
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
                         table_data={
                             'zone_id': array([1, 2]),
                         })
     storage.write_table(table_name='gridcells',
                         table_data={
                             'my_variable': array([4, 8, 0.5, 1]),
                             'grid_id': array([1, 2, 3, 4]),
                             'zone_id': array([1, 2, 1, 2]),
                         })
     zone_dataset = Dataset(in_storage=storage,
                            in_table_name='zones',
                            id_name="zone_id",
                            dataset_name='zone')
     gridcell_dataset = Dataset(in_storage=storage,
                                in_table_name='gridcells',
                                id_name="grid_id",
                                dataset_name='gridcell')
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('gridcell', gridcell_dataset)
     dataset_pool._add_dataset('zone', zone_dataset)
     self.assertRaises(ValueError,
                       zone_dataset.compute_variables, [expr],
                       dataset_pool=dataset_pool)
Example #14
0
 def test_agent_times_choice(self):
     expression = 'agent_x_choice.agent_times_choice(attr)'
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='agents', 
         table_data={'id': array([1, 2, 3, 4, 5]), 'attr_2': array([3,   2,   4,   10, 20]), 
                                                   'attr_3': array([10, 100, 1000, 500, 0]),
                                                   'attr_4': array([100, 500, 0, 20, -30])
                     }
         )
     storage.write_table(table_name='choices', 
         table_data={'id': array([1, 2, 3, 4])}
         )
     agents = Dataset(in_storage=storage, in_table_name='agents', dataset_name='agent', id_name='id')
     choices = Dataset(in_storage=storage, in_table_name='choices', dataset_name='choice', id_name='id')
     ids = InteractionDataset(dataset1=agents, dataset2=choices, index1=array([0,1,3,4]), index2=array([1,2,3])) 
     result = ids.compute_variables(expression)
     should_be = array([[3, 10, 100], [2,100,500], [10,500, 20], [20, 0, -30]])
     self.assertEqual(ma.allequal(result, should_be), True)
     
     agents.touch_attribute('attr_2') # in order to recompute the expression
     choices.add_primary_attribute(name='name', data=array(['bus', 'car', 'tran', 'walk']))
     agents.add_primary_attribute(name='attr_tran', data=array([100, 1000, 10000, 5000,10]))
     result = ids.compute_variables(expression)
     should_be = array([[3, 100, 100], [2,1000,500], [10,5000, 20], [20, 10, -30]])
     self.assertEqual(ma.allequal(result, should_be), True)
Example #15
0
 def test_join_datasets_with_2_ids(self):
     from numpy import ma
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(
         table_name='data1',
         table_data={
             'id1':array([2,4,2]),
             'id2':array([1,2,3]),
             'attr1':array([4,7,1]),
             'attr2':array([100,0,1000]),
             }
         )
     storage.write_table(
         table_name='data2',
         table_data={
             'id1':array([4,2,2]),
             'id2':array([2,3,1]),
             'attr1':array([50,60,70])
             }
         )
     
     ds1 = Dataset(in_storage=storage, in_table_name='data1', id_name=['id1', 'id2'], dataset_name='data1')
     ds2 = Dataset(in_storage=storage, in_table_name='data2', id_name=['id1', 'id2'], dataset_name='data2')
     ds1.join(ds2, 'attr1')
     self.assertEqual(ma.allequal(ds1.get_attribute('attr1'), array([70,50,60])), True)
     self.assertEqual(ma.allequal(ds1.get_attribute('attr2'), array([100,0,1000])), True)
 def test_aggregate_fully_qualified_variable(self):
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
                         table_data={
                             'zone_id': array([1, 2]),
                         })
     # it would be nicer to call this table 'gridcells' but we want to use the existing test variable
     storage.write_table(table_name='tests',
                         table_data={
                             'a_dependent_variable': array([4, 8, 0.5, 1]),
                             'id': array([1, 2, 3, 4]),
                             'zone_id': array([1, 2, 1, 2]),
                         })
     zone_dataset = Dataset(in_storage=storage,
                            in_table_name='zones',
                            id_name="zone_id",
                            dataset_name='zone')
     test_dataset = Dataset(in_storage=storage,
                            in_table_name='tests',
                            id_name="id",
                            dataset_name='tests')
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('zone', zone_dataset)
     dataset_pool._add_dataset('tests', test_dataset)
     values = zone_dataset.compute_variables(
         ['zone.aggregate(opus_core.tests.a_test_variable)'],
         dataset_pool=dataset_pool)
     should_be = array([45, 90])
     self.assert_(ma.allclose(values, should_be, rtol=1e-6),
                  "Error in test_aggregate_fully_qualified_variable")
 def test_aggregate_squared_with_cast(self):
     # more exercising the SUBPATTERN_NUMBER_OF_AGENTS_WITH_CAST tree pattern
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
                         table_data={
                             'zone_id': array([1, 2]),
                         })
     storage.write_table(table_name='gridcells',
                         table_data={
                             'my_variable': array([4, 8, 0.5, 1]),
                             'grid_id': array([1, 2, 3, 4]),
                             'zone_id': array([1, 2, 1, 2]),
                         })
     zone_dataset = Dataset(in_storage=storage,
                            in_table_name='zones',
                            id_name="zone_id",
                            dataset_name='zone')
     gridcell_dataset = Dataset(in_storage=storage,
                                in_table_name='gridcells',
                                id_name="grid_id",
                                dataset_name='gridcell')
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('gridcell', gridcell_dataset)
     dataset_pool._add_dataset('zone', zone_dataset)
     values = zone_dataset.compute_variables(
         ['(zone.aggregate(gridcell.my_variable)**2).astype(float32)'],
         dataset_pool=dataset_pool)
     should_be = array([4.5 * 4.5, 9.0 * 9.0])
     self.assert_(ma.allclose(values, should_be, rtol=1e-6),
                  "Error in aggregate")
Example #18
0
 def test_join_by_rows_for_unique_ids(self):
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(
         table_name='dataset1', 
         table_data={
             "id":array([2,4]), 
             "attr":array([4,7])
             }
         )
         
     storage.write_table(
         table_name='dataset2',
         table_data={
             "id":array([1,2]), 
             "attr":array([55,66])
             }
         )
     
     ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id')
     ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id')
     
     threw_exception = False
     try: 
         ds1.join_by_rows(ds2)
     except StandardError:
         threw_exception = True
     self.assert_(threw_exception)
 def test_number_of_agents_expression(self):
     expr = "mygridcell.number_of_agents(myjob)+10"
     storage = StorageFactory().get_storage('dict_storage')
     gridcell_grid_id = array([1, 2, 3])
     job_grid_id = array(
         [2, 1, 3, 1]
     )  #specify an array of 4 jobs, 1st job's grid_id = 2 (it's in gridcell 2), etc.
     storage.write_table(table_name='gridcells',
                         table_data={'gid': gridcell_grid_id})
     storage.write_table(table_name='jobs',
                         table_data={
                             'jid': arange(4) + 1,
                             'gid': job_grid_id
                         })
     gs = Dataset(in_storage=storage,
                  in_table_name='gridcells',
                  id_name="gid",
                  dataset_name="mygridcell")
     jobs = Dataset(in_storage=storage,
                    in_table_name='jobs',
                    id_name="jid",
                    dataset_name="myjob")
     values = gs.compute_variables([expr],
                                   resources=Resources({
                                       "myjob": jobs,
                                       "mygridcell": gs
                                   }))
     should_be = array([12, 11, 11])
     self.assert_(ma.allclose(values, should_be, rtol=1e-7),
                  msg="Error in " + expr)
Example #20
0
 def __init__(self, variable_name, observed_data, filename=None,  transformation=None, inverse_transformation=None, 
              filter=None, match=False, dependent_datasets={}, **kwargs):
     """  'variable_name' is a quantity about which we have data available.
     'observed_data' is of type ObservedData, it is the grouping parent. 
     'filename' is the name of file where 
     the data is stored. It can be None, if the observed_data.directory is a cache.
     'transformation' is an operation to be performed on the data (e.g. sqrt, log),
     'inverse_transformation' is the inverse function of 'transformation'. If it not given, it
     is determined automatically.
     'filter' is a variable that will be applied to both, the observed data and the simulated data.
     'match' (logical) determines if the dataset should be matched (by ids) with the simulated dataset. Elements
     that don't match are eliminated from the simulated dataset.
     'dependent_datasets' (if any) should be a dictionary of dataset_name:{'filename': filename, 'match': True|False, **kwargs}. 
     They will be added to the dataset_pool. 
     Remaining arguments are passed into DatasetFactory, thus it can contain information about how 
     to create the corresponding dataset.
     """
     self.variable_name = VariableName(variable_name)
     self.dataset_name = self.variable_name.get_dataset_name()
     dataset_pool = observed_data.get_dataset_pool()
     self.matching_datasets = {}
     
     if dataset_pool is None:
         kwargs.update({'in_storage':observed_data.get_storage(), 'in_table_name': filename})
         try:
             self.dataset = DatasetFactory().search_for_dataset(self.dataset_name, observed_data.get_package_order(), arguments=kwargs)
         except: # take generic dataset
             self.dataset = Dataset(dataset_name=self.dataset_name, **kwargs)
     else:
         self.dataset = dataset_pool.get_dataset(self.dataset_name)
     if match:
         self.add_match(self.dataset)
     for dep_dataset_name, info in dependent_datasets.iteritems():
         if dataset_pool is None:
             dataset_pool = DatasetPool(storage=observed_data.get_storage(), package_order=observed_data.get_package_order())
         info.update({'in_storage':observed_data.get_storage(), 'in_table_name': info.get('filename')})
         del info['filename']
         match = False
         if 'match' in info.keys():
             match = info['match']
             del info['match']
         try:
             dep_dataset = DatasetFactory().search_for_dataset(dep_dataset_name, observed_data.get_package_order(), arguments=info)
         except:
             dep_dataset = Dataset(dataset_name=dep_dataset_name, **info)
         dataset_pool.replace_dataset(dep_dataset_name, dep_dataset)
         if match:
             self.add_match(dep_dataset)
     if self.variable_name.get_alias() not in self.dataset.get_known_attribute_names():
         self.dataset.compute_variables([self.variable_name], dataset_pool=dataset_pool)
     if filter is not None:
         filter_values = self.dataset.compute_variables([filter], dataset_pool=dataset_pool)
         idx = where(filter_values > 0)[0]
         self.add_match(self.dataset, idx)
         self.dataset.subset_by_index(idx)
     self.transformation = transformation
     self.inverse_transformation = inverse_transformation
     if (self.transformation is not None) and (self.inverse_transformation is None):
         self.inverse_transformation = self.transformation_pairs[self.transformation]
Example #21
0
    def _search_for_dataset_helper(self, dataset_name, package_order,
                                   use_hidden_id, **kwargs):
        # this part of the search_for_dataset code is factored into a helper method, rather than passing in
        # use_hidden_id as a keyword parameter with a default value of False, so that we don't pass this
        # keyword parameter along to the get_dataset method
        for package_name in package_order:
            try:
                dataset = self.get_dataset(dataset_name,
                                           package=package_name,
                                           **kwargs)
                if dataset is not None:
                    break
            except ImportError:
                continue
        else:
            from opus_core.datasets.dataset import Dataset
            from opus_core.resources import Resources

            resources = Resources(kwargs.get('arguments', {}))
            if use_hidden_id:
                id_name_default = []
            else:
                id_name_default = "%s_id" % dataset_name
            (table_name, module_name, class_name
             ) = self._table_module_class_names_for_dataset(dataset_name)
            ## set table_name and id_name_default as default values in resources (arguments)
            resources.merge_with_defaults({
                'dataset_name': dataset_name,
                'in_table_name': table_name,
                'out_table_name': table_name,
                'id_name': id_name_default
            })
            try:
                dataset = Dataset(resources=resources)
            except:
                # try to create a dataset using deprecated values
                (table_name, module_name, class_name
                 ) = self._table_module_class_names_for_dataset_deprecated(
                     dataset_name)
                resources = Resources(kwargs.get('arguments', {}))
                resources.merge_with_defaults({
                    'dataset_name': dataset_name,
                    'in_table_name': table_name,
                    'out_table_name': table_name,
                    'id_name': id_name_default
                })
                try:
                    dataset = Dataset(resources=resources)
                except:
                    logger.log_warning(
                        "Could not create a generic Dataset '%s'." %
                        dataset_name)
                    raise
                #TODO: uncomment this warning when we change to singular
                #logger.log_warning("Dataset %s was created using deprecated table name - using plural will not be supported in the future." % dataset_name)
        return dataset
def prepare_for_estimate(specification_dict=None,
                         specification_storage=None,
                         specification_table=None,
                         agent_set=None,
                         household_set=None,
                         agents_for_estimation_storage=None,
                         agents_for_estimation_table=None,
                         households_for_estimation_table=None,
                         join_datasets=False,
                         filter=None,
                         data_objects=None):
    specification = get_specification_for_estimation(specification_dict,
                                                     specification_storage,
                                                     specification_table)
    if agents_for_estimation_storage is not None:
        estimation_set = Dataset(in_storage=agents_for_estimation_storage,
                                 in_table_name=agents_for_estimation_table,
                                 id_name=agent_set.get_id_name(),
                                 dataset_name=agent_set.get_dataset_name())
        hh_estimation_set = None
        if households_for_estimation_table is not None:
            hh_estimation_set = Dataset(
                in_storage=agents_for_estimation_storage,
                in_table_name=households_for_estimation_table,
                id_name=household_set.get_id_name(),
                dataset_name=household_set.get_dataset_name())

        filter_index = arange(estimation_set.size())
        if filter:
            estimation_set.compute_variables(filter,
                                             resources=Resources(data_objects))
            filter_index = where(estimation_set.get_attribute(filter) > 0)[0]
            #estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

        if join_datasets:
            if hh_estimation_set is not None:
                household_set.join_by_rows(hh_estimation_set,
                                           require_all_attributes=False,
                                           change_ids_if_not_unique=True)

            agent_set.join_by_rows(estimation_set,
                                   require_all_attributes=False,
                                   change_ids_if_not_unique=True)
            index = arange(agent_set.size() - estimation_set.size(),
                           agent_set.size())[filter_index]
        else:
            index = agent_set.get_id_index(
                estimation_set.get_id_attribute()[filter_index])
    else:
        if agent_set is not None:
            index = arange(agent_set.size())
        else:
            index = None

    return (specification, index)
 def test_aggregate_sum_two_levels(self):
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
                         table_data={
                             'my_variable':
                             array([4, 8, 2, 1, 40, 23, 78, 20, 25]),
                             'id0':
                             arange(9) + 1,
                             'id1':
                             array([1, 3, 1, 2, 3, 2, 1, 4, 4])
                         })
     storage.write_table(table_name='fazes',
                         table_data={
                             'id1': array([1, 2, 3, 4]),
                             'id2': array([1, 2, 1, 3])
                         })
     storage.write_table(table_name='fazdistrs',
                         table_data={
                             'id2': array([1, 2, 3]),
                             'id3': array([1, 2, 1])
                         })
     storage.write_table(table_name='neighborhoods',
                         table_data={"id3": array([1, 2])})
     ds0 = Dataset(in_storage=storage,
                   in_table_name='zones',
                   id_name="id0",
                   dataset_name="myzone")
     ds1 = Dataset(in_storage=storage,
                   in_table_name='fazes',
                   id_name="id1",
                   dataset_name="myfaz")
     ds2 = Dataset(in_storage=storage,
                   in_table_name='fazdistrs',
                   id_name="id2",
                   dataset_name="myfazdistr")
     ds3 = Dataset(in_storage=storage,
                   in_table_name='neighborhoods',
                   id_name="id3",
                   dataset_name="myneighborhood")
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('myzone', ds0)
     dataset_pool._add_dataset('myfaz', ds1)
     dataset_pool._add_dataset('myfazdistr', ds2)
     dataset_pool._add_dataset('myneighborhood', ds3)
     values = ds3.compute_variables([
         'myneighborhood.aggregate(myzone.my_variable, intermediates=[myfaz,myfazdistr], function=sum)'
     ],
                                    dataset_pool=dataset_pool)
     should_be = array([177, 24])
     self.assert_(ma.allclose(values, should_be, rtol=1e-6),
                  "Error in aggregate_sum_two_levels")
 def test_versioning_with_aggregate(self):
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='households',
                         table_data={
                             'my_variable': array([4, 8, 2, 1, 40, 23, 78]),
                             'id0': arange(7) + 1,
                             'id1': array([1, 3, 1, 2, 3, 2, 1])
                         })
     storage.write_table(table_name='fazes',
                         table_data={
                             'id1': array([1, 2, 3]),
                             'id2': array([1, 2, 1])
                         })
     storage.write_table(table_name='fazdistr',
                         table_data={'id2': array([1, 2])})
     ds0 = Dataset(in_storage=storage,
                   in_table_name='households',
                   id_name="id0",
                   dataset_name="myhousehold")
     ds1 = Dataset(in_storage=storage,
                   in_table_name='fazes',
                   id_name="id1",
                   dataset_name="myfaz")
     ds2 = Dataset(in_storage=storage,
                   in_table_name='fazdistr',
                   id_name="id2",
                   dataset_name="myfazdistr")
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('myhousehold', ds0)
     dataset_pool._add_dataset('myfaz', ds1)
     dataset_pool._add_dataset('myfazdistr', ds2)
     ds0.modify_attribute("id1", array([1, 3, 1, 2, 3, 2,
                                        1]))  # has version 1
     variable = 'my_var = myfazdistr.aggregate(10.0*myhousehold.my_variable, intermediates=[myfaz])'
     ds2.compute_variables([variable], dataset_pool=dataset_pool)
     self.assert_(ds2.get_version("my_var") == 0)
     ds2.compute_variables([variable], dataset_pool=dataset_pool)
     self.assert_(
         ds2.get_version("my_var") ==
         0)  # version should stay the same, i.e. it should not recompute
     ds0.touch_attribute("id1")  # has version 2
     ds2.compute_variables([variable], dataset_pool=dataset_pool)
     self.assert_(
         ds2.get_version("my_var") ==
         1)  # version should be 1, i.e. it should recompute when id changes
     ds1.touch_attribute("id2")  # has version 1
     ds2.compute_variables([variable], dataset_pool=dataset_pool)
     self.assert_(
         ds2.get_version("my_var") ==
         2)  # version should be 2, i.e. it should recompute when id changes
Example #25
0
 def test_alias_complex_expression(self):
     # aliasing a complex expression
     expr = "x = 2*sqrt(var1+var2)"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='dataset',
                         table_data={
                             "var1": array([4, -8, 0.5, 1]),
                             "var2": array([3, 3, 7, 7]),
                             "id": array([1, 2, 3, 4])
                         })
     dataset = Dataset(in_storage=storage,
                       in_table_name='dataset',
                       id_name="id",
                       dataset_name="mydataset")
     result = dataset.compute_variables([expr])
     should_be = array([5.29150262, 0.0, 5.47722558, 5.65685425])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6),
                  "Error in test_alias_complex_expression")
     # check that the new var has x as an alias
     v = VariableName(expr)
     self.assertEqual(v.get_alias(), 'x', msg="bad value for alias")
     # check that the alias gives the correct value
     result2 = dataset.compute_variables(['x'])
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6),
                  "Error in accessing a_test_variable")
Example #26
0
 def test_alias_attribute_same_name(self):
     # this tests an expression consisting of an alias for a primary attribute that is the same name as the primary attribute
     expr = "persons = persons"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='tests',
                         table_data={
                             "persons": array([1, 5, 10]),
                             "id": array([1, 3, 4])
                         })
     dataset = Dataset(in_storage=storage,
                       in_table_name='tests',
                       id_name="id",
                       dataset_name="tests")
     result = dataset.compute_variables([expr])
     self.assertEqual(ma.allclose(result, [1, 5, 10], rtol=1e-7),
                      True,
                      msg="error in test_alias_attribute")
     name = VariableName(expr)
     self.assertEqual(name.get_short_name(),
                      'persons',
                      msg="bad value for shortname")
     self.assertEqual(name.get_alias(),
                      'persons',
                      msg="bad value for alias")
     self.assertEqual(name.get_autogen_class(),
                      None,
                      msg="bad value for autogen_class")
Example #27
0
 def test_alias_with_delete_computed_attributes(self):
     # Make an alias for an expression, then delete all computed attributes, then use the same alias
     # for a different expression.  This tests that the dictionary of aliases that have been defined
     # is cleared when you delete attributes.
     expr1 = "x = 2*sqrt(var1+var2)"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='dataset',
                         table_data={
                             "var1": array([4, -8, 0.5, 1]),
                             "var2": array([3, 3, 7, 7]),
                             "id": array([1, 2, 3, 4])
                         })
     dataset = Dataset(in_storage=storage,
                       in_table_name='dataset',
                       id_name="id",
                       dataset_name="mydataset")
     result = dataset.compute_variables([expr1])
     should_be = array([5.29150262, 0.0, 5.47722558, 5.65685425])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6),
                  "Error in test_alias_with_delete_computed_attributes")
     dataset.delete_computed_attributes()
     # now alias x to a different expression
     expr2 = "x = var1+10"
     # check that the new var has x as an alias
     result2 = dataset.compute_variables([expr2])
     should_be2 = array([14, 2, 10.5, 11])
     self.assert_(ma.allclose(result2, should_be2, rtol=1e-6),
                  "Error in test_alias_with_delete_computed_attributes")
Example #28
0
 def test_alias_fully_qualified_variable_same_name(self):
     expr = "a_test_variable = opus_core.tests.a_test_variable"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='tests',
                         table_data={
                             "a_dependent_variable": array([1, 5, 10]),
                             "id": array([1, 3, 4])
                         })
     dataset = Dataset(in_storage=storage,
                       in_table_name='tests',
                       id_name="id",
                       dataset_name="tests")
     result = dataset.compute_variables([expr])
     should_be = array([10, 50, 100])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6),
                  "Error in test_alias_fully_qualified_variable")
     result2 = dataset.compute_variables(['a_test_variable'])
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6),
                  "Error in accessing a_test_variable")
     v = VariableName(expr)
     # check that no autogen class was generated
     self.assertEqual(v.get_autogen_class(),
                      None,
                      msg="bad value for autogen_class")
     # check that the alias is correct
     self.assertEqual(v.get_alias(),
                      'a_test_variable',
                      msg="bad value for alias")
Example #29
0
 def test_alias_fully_qualified_variable(self):
     expr = "x = opus_core.tests.a_test_variable"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='tests',
                         table_data={
                             "a_dependent_variable": array([1, 5, 10]),
                             "id": array([1, 3, 4])
                         })
     dataset = Dataset(in_storage=storage,
                       in_table_name='tests',
                       id_name="id",
                       dataset_name="tests")
     result = dataset.compute_variables([expr])
     should_be = array([10, 50, 100])
     self.assert_(ma.allclose(result, should_be, rtol=1e-6),
                  "Error in test_alias_fully_qualified_variable")
     # check that the new var has x as an alias
     v = VariableName(expr)
     self.assertEqual(v.get_package_name(),
                      None,
                      msg="bad value for package_name")
     self.assertEqual(v.get_dataset_name(),
                      'tests',
                      msg="bad value for dataset_name")
     self.assert_(v.get_short_name().startswith('autogen'),
                  msg="bad value for shortname")
     self.assertEqual(v.get_alias(), 'x', msg="bad value for alias")
     self.assertNotEqual(v.get_autogen_class(),
                         None,
                         msg="bad value for autogen_class")
     # check that the alias has the correct value
     result2 = dataset.compute_variables(['x'])
     self.assert_(ma.allclose(result2, should_be, rtol=1e-6),
                  "Error in accessing a_test_variable")
Example #30
0
 def test_alias_attribute(self):
     # this tests an expression consisting of an alias for a primary attribute
     expr = "p = persons"
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='tests',
                         table_data={
                             "persons": array([1, 5, 10]),
                             "id": array([1, 3, 4])
                         })
     dataset = Dataset(in_storage=storage,
                       in_table_name='tests',
                       id_name="id",
                       dataset_name="tests")
     result = dataset.compute_variables([expr])
     self.assertEqual(ma.allclose(result, [1, 5, 10], rtol=1e-7),
                      True,
                      msg="error in test_alias_attribute")
     # check that the access methods for the variable all return the correct values
     name = VariableName(expr)
     self.assertEqual(name.get_package_name(),
                      None,
                      msg="bad value for package")
     self.assertEqual(name.get_dataset_name(),
                      None,
                      msg="bad value for dataset")
     self.assert_(name.get_short_name().startswith('autogen'),
                  msg="bad value for shortname")
     self.assertEqual(name.get_alias(), 'p', msg="bad value for alias")
     self.assertNotEqual(name.get_autogen_class(),
                         None,
                         msg="bad value for autogen_class")