Example #1
0
 def test_join_by_rows_for_char_arrays(self):
     from numpy import alltrue
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(
         table_name='dataset1', 
         table_data={
             'id':array([2,4,6,8]), 
             'attr':array(['4','7','2','1'])
             }
         )
         
     storage.write_table(
         table_name='dataset2',
         table_data={
             'id':array([1,5,9]), 
             'attr':array(['55','66','100'])
             }
         )
     
     ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id')
     ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id')
     
     ds1.join_by_rows(ds2)
     self.assert_(alltrue(ds1.get_attribute('attr') == array(['4','7','2','1','55','66','100'])))
     self.assert_(alltrue(ds2.get_attribute('attr') == array(['55','66','100'])))
 def create_edges(self, input_file_dir, input_file_name, output_file_name):
     storage = StorageFactory().get_storage(type='tab_storage', subdir='store', 
         storage_location=input_file_dir)
     dataset = Dataset(in_storage = storage, id_name = ['stop_id','sch_time'], in_table_name = input_file_name)
     
     n = dataset.size()
     trip_ids = dataset.get_attribute("stop_id")
     unique_trip_ids = unique(trip_ids)
     source_list = list()
     target_list = list()
     time_list = list()
     
     for trip in unique_trip_ids:
         idx = where(dataset.get_attribute("stop_id") == trip)[0]
         nodes = dataset.get_attribute_by_index("node_id", idx)
         times = dataset.get_attribute_by_index("sch_time", idx)
         for inode in range(nodes.size-1):
             source_list.append(nodes[inode])
             target_list.append(nodes[inode+1])
             time_list.append(times[inode+1] - times[inode])
    
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(table_name='edges',
         table_data={
             'edge_id': arange(len(source_list))+1, 
             'source': array(source_list), #type=int64), # <<<< OUTPUT FIELD, USE array
             'target': array(target_list), #type=int64), # <<<< OUTPUT FIELD, USE array
             'cost': array(time_list, dtype=int32)
             }
         )
    
     edges = Dataset(in_storage=storage, in_table_name='edges', id_name = "edge_id")
     
     edges.write_dataset(attributes = ["source", "target", "cost"], out_storage = storage, out_table_name = output_file_name)
Example #3
0
 def test_join_by_rows(self):
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(
         table_name='dataset1', 
         table_data={    
             'id':array([2,4,6,8]), 
             'attr':array([4,7,2,1])
             }
         )
         
     storage.write_table(
         table_name='dataset2',
         table_data={
             'id':array([1,5,9]), 
             'attr':array([55,66,100])
             }
         )
     
     ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id')
     ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id')
     
     ds1.join_by_rows(ds2)
     self.assert_(ma.allclose(ds1.get_attribute('attr'), array([4,7,2,1,55,66,100])))
     self.assert_(ma.allclose(ds2.get_attribute('attr'), array([55,66,100])))
    def test_change_three_elements(self):
        """3 values are in common - change them to -1. Other attributes stay unchanged."""
        data = {
           'my_id': array([1,2,3,4,5]),
           'attr':  array([10,2,3,50,2]),
           'attr2': array([4,3,2,5,3])    
                }
        data2 = {
            'attr': array([2,6,7,3])
                 }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='dataset', table_data=data)
        dataset = Dataset(in_storage = storage, 
                           in_table_name='dataset',
                           id_name='my_id'
                           )
        storage.write_table(table_name='dataset2', table_data=data2)
        dataset2 = Dataset(in_storage = storage, 
                           in_table_name='dataset2',
                           id_name='attr'
                           )
        JoinAttributeModificationModel().run(dataset,dataset2, value=-1)
        self.assertEqual(ma.allequal(dataset.get_attribute('attr'), array([10,-1,-1,50,-1])), True)
        self.assertEqual(ma.allequal(dataset.get_attribute('attr2'), data['attr2']), True)
Example #5
0
class SimpleModelTest(opus_unittest.OpusTestCase):
    def setUp(self):
        self.data = {
            'id':
            arange(10) + 1,
            'attribute':
            array([3000, 2800, 1000, 550, 600, 1000, 2000, 500, 100, 1000])
        }
        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name='dataset', table_data=self.data)
        self.dataset = Dataset(in_storage=storage,
                               in_table_name='dataset',
                               id_name=['id'])

    def test_simple_model(self):
        m = SimpleModel()
        m.run(self.dataset,
              'sqrt(dataset.attribute)',
              outcome_attribute='sqrtattr')
        self.assertEqual(
            ma.allclose(self.dataset.get_attribute('sqrtattr'),
                        sqrt(self.data['attribute'])), True)
        self.assertEqual(
            'sqrtattr' in self.dataset.get_primary_attribute_names(), True)

    def test_simple_model_without_outcome_attribute(self):
        m = SimpleModel()
        m.run(self.dataset, 'lattr = ln(dataset.attribute)')
        self.assertEqual(
            ma.allclose(self.dataset.get_attribute('lattr'),
                        log(self.data['attribute'])), True)
        self.assertEqual('lattr' in self.dataset.get_primary_attribute_names(),
                         True)
 def test_simple_lag_variable(self):
     test_data = {
         1000:{
             'tests':{
                 'id':array([1,2,3]),
                 'attr1':array([10,20,30]),
                 },
             },
         1001:{
             'tests':{
                 'id':array([1,2,3]),
                 'attr1':array([111,222,333]),
                 },
             },
         }
     cache_creator = CreateTestAttributeCache()
     cache_creator.create_attribute_cache_with_data(self._temp_dir, test_data)
     
     SimulationState().set_current_time(1001)
     
     attribute_cache = AttributeCache()
     SessionConfiguration(new_instance=True,
                          package_order=['opus_core'],
                          in_storage=attribute_cache)
     
     ds = Dataset(in_storage = attribute_cache, 
                  in_table_name = 'tests', 
                  id_name = ['id'], 
                  dataset_name = 'tests')
     
     ds.compute_variables(['opus_core.tests.attr1'])
     self.assert_(ma.allequal(ds.get_attribute('attr1'), array([111,222,333])))
     
     ds.compute_variables(['opus_core.tests.attr1_lag1'])
     self.assert_(ma.allequal(ds.get_attribute('attr1_lag1'), array([10,20,30])))
Example #7
0
 def test_flush_dataset_correct_data(self):
     job_set = Dataset(self.job_set_resources, dataset_name="jobs")
     job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY)
     job_set.add_attribute(self.expected_sic_data, "sic", metadata=AttributeType.COMPUTED)
     job_set.flush_dataset()
     returned_sic_data = job_set.get_attribute("sic")
     returned_id_data = job_set.get_attribute("job_id")
     self.assert_(ma.allequal(returned_id_data,self.job_id))
     self.assert_(ma.allequal(returned_sic_data,self.expected_sic_data))
class SimpleModelTest(opus_unittest.OpusTestCase):
    def setUp(self):
        self.data = {
            'id': arange(10)+1,
            'attribute':  array([3000,2800,1000,550,600,1000,2000,500,100,1000]),
            'sqrt_outcome': zeros(10)
        }
        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name = 'dataset', table_data = self.data)
        self.dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name=['id'])

    def test_simple_model(self):
        m = SimpleModel()
        m.run(self.dataset, 'sqrt(dataset.attribute)', outcome_attribute='sqrtattr')
        self.assertEqual(ma.allclose(self.dataset.get_attribute('sqrtattr'), sqrt(self.data['attribute'])), True)
        self.assertEqual('sqrtattr' in self.dataset.get_primary_attribute_names(), True)

    def test_simple_model_with_filter(self):
        m = SimpleModel()
        m.run(self.dataset, 'sqrt(dataset.attribute)', outcome_attribute='sqrt_outcome', dataset_filter='dataset.attribute>1000')
        expected = array([1, 1, 0, 0, 0, 0, 1, 0, 0, 0]) * sqrt(self.data['attribute'])
        self.assertEqual(ma.allclose(self.dataset.get_attribute('sqrt_outcome'), 
                                     expected), True)
        self.assertEqual('sqrt_outcome' in self.dataset.get_primary_attribute_names(), True)

    def MASKEDtest_simple_model_with_random_filter(self):
        m = SimpleModel()
        m.run(self.dataset, 'sqrt(dataset.attribute)', 
              outcome_attribute='sqrt_outcome', 
              dataset_filter='(dataset.attribute>=1000) & (random_like(dataset.attribute)<=0.5)',
             )
        con_filter = self.dataset['attribute']>=1000
        results = self.dataset['sqrt_outcome'][con_filter]
        expected = sqrt(self.data['attribute'])[con_filter]
        #test half of the elements passing filter are being sqrt
        self.assertEqual((results==expected).sum(), expected.size/2)
        self.assertEqual((results!=expected).sum(), expected.size/2)
        
    def test_simple_model_without_outcome_attribute(self):
        m = SimpleModel()
        m.run(self.dataset, 'lattr = ln(dataset.attribute)')
        self.assertEqual(ma.allclose(self.dataset.get_attribute('lattr'), log(self.data['attribute'])), True)
        self.assertEqual('lattr' in self.dataset.get_primary_attribute_names(), True)
        
    def test_simple_model_with_outcome_values(self):
        m = SimpleModel()
        m.run(self.dataset,  outcome_attribute='iniattr', outcome_values=zeros(10)-1)
        self.assertEqual(ma.allclose(self.dataset.get_attribute('iniattr'), array(10*[-1])), True)
        self.assertEqual('iniattr' in self.dataset.get_primary_attribute_names(), True)
        # run with filter
        m.run(self.dataset,  outcome_attribute='iniattr', outcome_values=arange(10)+1, dataset_filter='dataset.attribute>1000')
        expected = array([1, 2, -1, -1, -1, -1, 7, -1, -1, -1])
        self.assertEqual(ma.allclose(self.dataset.get_attribute('iniattr'), expected), True)
Example #9
0
        def _determine_exogenous_attribute_names(self, resources):
            exogenous_attribute_names = {}

            try:
                storage = copy.deepcopy(resources['in_storage'])
            except:
                pass
            else:
                exogenous_relationships_dataset = Dataset(
                    in_storage=storage,
                    in_table_name='exogenous_relationships',
                    id_name='exogenous_id')

                try:
                    base_tables = exogenous_relationships_dataset.get_attribute(
                        'base_table')
                except:
                    logger.log_warning(
                        "An exogenous_relationships table was found, but did not contain the 'base_table' attribute."
                    )
                    return {}

                try:
                    exogenous_tables = exogenous_relationships_dataset.get_attribute(
                        'exogenous_table')
                except:
                    logger.log_warning(
                        "An exogenous_relationships table was found, but did not contain the 'exogenous_table' attribute."
                    )
                    return {}

                relationships = zip(base_tables, exogenous_tables)

                for base_table, exogenous_table in relationships:
                    if base_table == resources['in_table_name']:
                        exogenous_attributes = self.determine_stored_attribute_names(
                            in_storage=resources['in_storage'],
                            in_table_name=exogenous_table)
                        for exogenous_attribute in exogenous_attributes:
                            try:
                                exogenous_attribute_names[exogenous_attribute]
                            except:
                                exogenous_attribute_names[
                                    exogenous_attribute] = exogenous_table
                            else:
                                raise AttributeError(
                                    "Duplicate exogenous "
                                    "attribute '%s' found in '%s' and '%s'." %
                                    (exogenous_attribute,
                                     exogenous_attribute_names[
                                         exogenous_attribute],
                                     exogenous_table))
            return exogenous_attribute_names
    def test_estimation_without_procedure(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='dataset',
                            table_data={
                                "id": array([1, 2, 3, 4]),
                                "attr1": array([4, 7, 2, 1]),
                                "attr2": array([6.8, 2.6, 0, 1]),
                                "submodel_id": array([1, 2, 2, 1]),
                                "outcome": array([0, 1, 0, 1])
                            })

        ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id")
        specification = EquationSpecification(
            variables=array(["constant", "attr2", "attr1"]),
            coefficients=array(["constant", "ba2", "ba1"]))

        model = RegressionModel()
        model.estimate(specification, ds, "outcome")
        data_attr1 = model.get_data("ba1")
        self.assert_(ma.allequal(ds.get_attribute("attr1"), data_attr1),
                     msg="Error in getting data from regression model")

        specification_2subm = EquationSpecification(
            variables=array(["constant", "attr2", "constant", "attr1"]),
            coefficients=array(["constant", "ba2", "constant", "ba1"]),
            submodels=array([1, 1, 2, 2]))

        model = RegressionModel(submodel_string="submodel_id")
        model.estimate(specification_2subm, ds, "outcome")
        data_attr1 = model.get_data("ba1", 1)
        self.assert_(
            data_attr1 == None,
            msg=
            "Error in getting data from regression model with multiple submodels."
        )
        data_attr1 = model.get_data("ba1", 2)
        self.assert_(
            ma.allequal(ds.get_attribute("attr1")[1:3], data_attr1),
            msg=
            "Error in getting data from regression model with multiple submodels."
        )
        d = model.get_data_as_dataset(2)
        self.assert_(
            ma.allequal(
                ds.get_attribute("attr1")[1:3], d.get_attribute("ba1")),
            msg=
            "Error in getting data from regression model with multiple submodels."
        )
Example #11
0
 def test_flush_dataset_correct_flags(self):
     job_set = Dataset(self.job_set_resources, dataset_name="jobs")
     self.assert_(not 'job_id' in job_set.attribute_boxes)
     
     job_set.get_attribute("job_id")
     self.assert_(job_set.attribute_boxes["job_id"].is_in_memory())
     self.assert_(not job_set.attribute_boxes["job_id"].is_cached())
     
     job_set.flush_dataset()
     self.assert_(not job_set.attribute_boxes["job_id"].is_in_memory())
     self.assert_(job_set.attribute_boxes["job_id"].is_cached())
     
     job_set.get_attribute("job_id")
     self.assert_(job_set.attribute_boxes["job_id"].is_in_memory())
     self.assert_(job_set.attribute_boxes["job_id"].is_cached())
 def test_aggregate_all_mean(self):
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
                         table_data={
                             'my_variable': array([4, 8, 10, 1]),
                             'id': array([1, 2, 3, 4]),
                         })
     storage.write_table(table_name='regions',
                         table_data={
                             "id": array([1]),
                         })
     ds = Dataset(in_storage=storage,
                  in_table_name='zones',
                  id_name="id",
                  dataset_name="myzone")
     ds2 = Dataset(in_storage=storage,
                   in_table_name='regions',
                   id_name="id",
                   dataset_name="myregion")
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('myzone', ds)
     dataset_pool._add_dataset('myregion', ds2)
     ds2.compute_variables([
         "myvar = myregion.aggregate_all(myzone.my_variable, function=mean)"
     ],
                           dataset_pool=dataset_pool)
     values = ds2.get_attribute("myvar")
     should_be = array([5.75])
     self.assert_(ma.allclose(values, should_be, rtol=1e-6),
                  "Error in aggregate_all_mean")
    def test_change_with_index_and_filter(self):
        """The secondary dataset is restricted by index and filter."""
        data = {
           'my_id': array([1,2,3,4,5,6]),
           'attr':  array([10,20,30,50,46,100]),
           'attr2': array(6*[1])
                }
        data2 = {
            'attr': array([20, 6, 7, 3, 10, 30, 100, 50])
                 }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='dataset', table_data=data)
        dataset = Dataset(in_storage = storage, 
                           in_table_name='dataset',
                           id_name='my_id'
                           )
        storage.write_table(table_name='dataset2', table_data=data2)
        dataset2 = Dataset(in_storage = storage, 
                           in_table_name='dataset2',
                           id_name='attr'
                           )
        JoinAttributeModificationModel().run(dataset,dataset2, index=array([0,1,2,7]), attribute_to_be_modified='attr2', 
                                             filter='attr > 20')
        self.assertEqual(ma.allequal(dataset.get_attribute('attr2'), array([1, 1, 1, 0, 1, 1])), True)
    def test_change_with_index_and_filter(self):
        """The secondary dataset is restricted by index and filter."""
        data = {
            'my_id': array([1, 2, 3, 4, 5, 6]),
            'attr': array([10, 20, 30, 50, 46, 100]),
            'attr2': array(6 * [1])
        }
        data2 = {'attr': array([20, 6, 7, 3, 10, 30, 100, 50])}
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='dataset', table_data=data)
        dataset = Dataset(in_storage=storage,
                          in_table_name='dataset',
                          id_name='my_id')
        storage.write_table(table_name='dataset2', table_data=data2)
        dataset2 = Dataset(in_storage=storage,
                           in_table_name='dataset2',
                           id_name='attr')
        JoinAttributeModificationModel().run(dataset,
                                             dataset2,
                                             index=array([0, 1, 2, 7]),
                                             attribute_to_be_modified='attr2',
                                             filter='attr > 20')
        self.assertEqual(
            ma.allequal(dataset.get_attribute('attr2'),
                        array([1, 1, 1, 0, 1, 1])), True)
Example #15
0
    def test_compute_does_not_unload_from_memory(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='tests',
                            table_data={
                                "a_dependent_variable": array([1, 5, 1000]),
                                "id": array([1, 3, 4])
                            })

        dataset = Dataset(in_storage=storage,
                          in_table_name='tests',
                          id_name="id",
                          dataset_name="tests")

        values = dataset.get_attribute("a_dependent_variable")
        self.assert_(
            "a_dependent_variable" in dataset.get_attributes_in_memory())
        dataset.compute_variables("opus_core.tests.a_test_variable")
        self.assert_(
            "a_dependent_variable" in dataset.get_attributes_in_memory())
        self.assert_("a_test_variable" in dataset.get_attributes_in_memory())
        # The type of values will be int32 on a 32-bit machine, and int64 on a 64 bit machine
        if platform.architecture()[0] == '64bit':
            self.assertEqual(values.dtype.type, int64)
        else:
            self.assertEqual(values.dtype.type, int32)
    def test_estimation_with_restricted_submodel_size(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(
            table_name='dataset',
            table_data={
                "id":array([1,2,3,4,5,6,7,8,9,10]),
                "attr1":array([4,7,2,1,5,4,5,3,2,1]),
                "attr2":array([6.8,2.6,0,1,0,4.3,2.1,6,8,7,]),
                "submodel_id": array([1,2,2,1,1,1,2,1,1,1]),
                "outcome": array([0,1,0,1,1,1,0,0,1,1])
                }
            )

        ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id")
        specification_2subm = EquationSpecification(
                          variables=array(["constant", "attr2", "constant", "attr1"]),
                          coefficients=array(["constant", "ba2", "constant", "ba1"]),
                          submodels = array([1,1,2,2]))

        model = RegressionModel(submodel_string="submodel_id", estimate_config={'submodel_size_max': 5})
        model.estimate(specification_2subm, ds, "outcome", procedure='opus_core.estimate_linear_regression')

        data_attr1 = model.get_data("ba1", 1)
        self.assert_(data_attr1 == None, msg = "Error in getting data from regression model with multiple submodels.")
        data_attr1 = model.get_data("ba2", 1)
        self.assert_(data_attr1.size == 5, msg = "Error in sub-sampling data in regression model.")
        data_attr1 = model.get_data("ba1", 2)
        self.assert_(ma.allequal(ds.get_attribute("attr1")[array([1, 2, 6])], data_attr1),
                     msg = "Error in getting data from regression model with multiple submodels.")
Example #17
0
 def test_join_datasets_with_2_ids(self):
     from numpy import ma
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(
         table_name='data1',
         table_data={
             'id1':array([2,4,2]),
             'id2':array([1,2,3]),
             'attr1':array([4,7,1]),
             'attr2':array([100,0,1000]),
             }
         )
     storage.write_table(
         table_name='data2',
         table_data={
             'id1':array([4,2,2]),
             'id2':array([2,3,1]),
             'attr1':array([50,60,70])
             }
         )
     
     ds1 = Dataset(in_storage=storage, in_table_name='data1', id_name=['id1', 'id2'], dataset_name='data1')
     ds2 = Dataset(in_storage=storage, in_table_name='data2', id_name=['id1', 'id2'], dataset_name='data2')
     ds1.join(ds2, 'attr1')
     self.assertEqual(ma.allequal(ds1.get_attribute('attr1'), array([70,50,60])), True)
     self.assertEqual(ma.allequal(ds1.get_attribute('attr2'), array([100,0,1000])), True)
        def _determine_exogenous_attribute_names(self, resources):
            exogenous_attribute_names = {}

            try:
                storage = copy.deepcopy(resources['in_storage'])
            except: pass
            else:
                exogenous_relationships_dataset = Dataset(
                    in_storage = storage, 
                    in_table_name = 'exogenous_relationships', 
                    id_name = 'exogenous_id'
                    )
                
                try:
                    base_tables = exogenous_relationships_dataset.get_attribute('base_table')
                except:
                    logger.log_warning("An exogenous_relationships table was found, but did not contain the 'base_table' attribute.")
                    return {}
                
                try:
                    exogenous_tables = exogenous_relationships_dataset.get_attribute('exogenous_table')
                except:
                    logger.log_warning("An exogenous_relationships table was found, but did not contain the 'exogenous_table' attribute.")
                    return {}
                
                relationships = zip(base_tables, exogenous_tables)
                
                for base_table, exogenous_table in relationships:
                    if base_table == resources['in_table_name']:
                        exogenous_attributes = self.determine_stored_attribute_names(
                            in_storage = resources['in_storage'], 
                            in_table_name = exogenous_table
                            )
                        for exogenous_attribute in exogenous_attributes:
                            try: 
                                exogenous_attribute_names[exogenous_attribute]
                            except: 
                                exogenous_attribute_names[exogenous_attribute] = exogenous_table
                            else: 
                                raise AttributeError("Duplicate exogenous "
                                    "attribute '%s' found in '%s' and '%s'."     
                                       % (exogenous_attribute, 
                                          exogenous_attribute_names[exogenous_attribute], 
                                          exogenous_table
                                       )
                                   )
            return exogenous_attribute_names
    def test_estimation_with_restricted_submodel_size(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='dataset',
                            table_data={
                                "id":
                                array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
                                "attr1":
                                array([4, 7, 2, 1, 5, 4, 5, 3, 2, 1]),
                                "attr2":
                                array([
                                    6.8,
                                    2.6,
                                    0,
                                    1,
                                    0,
                                    4.3,
                                    2.1,
                                    6,
                                    8,
                                    7,
                                ]),
                                "submodel_id":
                                array([1, 2, 2, 1, 1, 1, 2, 1, 1, 1]),
                                "outcome":
                                array([0, 1, 0, 1, 1, 1, 0, 0, 1, 1])
                            })

        ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id")
        specification_2subm = EquationSpecification(
            variables=array(["constant", "attr2", "constant", "attr1"]),
            coefficients=array(["constant", "ba2", "constant", "ba1"]),
            submodels=array([1, 1, 2, 2]))

        model = RegressionModel(submodel_string="submodel_id",
                                estimate_config={'submodel_size_max': 5})
        model.estimate(specification_2subm,
                       ds,
                       "outcome",
                       procedure='opus_core.estimate_linear_regression')

        data_attr1 = model.get_data("ba1", 1)
        self.assert_(
            data_attr1 == None,
            msg=
            "Error in getting data from regression model with multiple submodels."
        )
        data_attr1 = model.get_data("ba2", 1)
        self.assert_(data_attr1.size == 5,
                     msg="Error in sub-sampling data in regression model.")
        data_attr1 = model.get_data("ba1", 2)
        self.assert_(
            ma.allequal(
                ds.get_attribute("attr1")[array([1, 2, 6])], data_attr1),
            msg=
            "Error in getting data from regression model with multiple submodels."
        )
Example #20
0
    def test_regression_model_with_constant_variation(self):
        """Estimate the model and run it on the same data as the estimation. The result should be equal to the original data.
        If there is a change in the explanatory variables, the result should not be equal.
        """
        storage = StorageFactory().get_storage('dict_storage')

        table_name = 'dataset_table'
        data = {
            "attr1": array([30, 0, 90, 100, 65, 50]),
            "attr2": array([2002, 1968, 1880, 1921, 1956, 1989]),
            "attr3": array([0.5, 0.1, 0.3, 0.9, 0.2, 0.8]),
            "outcome": array([20, 40, 15, 5, 40, 30], dtype="int32"),
            "id": array([1, 2, 3, 4, 5, 6])
        }
        storage.write_table(table_name=table_name, table_data=data)
        dataset = Dataset(in_storage=storage,
                          in_table_name=table_name,
                          id_name="id")

        specification = EquationSpecification(variables=("attr1", "attr2",
                                                         "attr3", "constant"),
                                              coefficients=("b1", "b2", "b3",
                                                            "constant"))

        model = RegressionModelWithAdditionInitialResiduals(
            outcome_attribute="outcome")
        coef, dummy = model.estimate(
            specification,
            dataset,
            outcome_attribute="outcome",
            procedure="opus_core.estimate_linear_regression")
        result = model.run(specification, coef, dataset)

        # if estimated and run on the same data, it should give the original outcome
        self.assertEqual(ma.allequal(result, data["outcome"]), True)

        # if some values changed it shoudn't be the same for those elements
        dataset.set_values_of_one_attribute("attr1", array([32, 10]),
                                            arange(2))
        result2 = model.run(specification, coef, dataset)
        self.assertEqual(ma.allequal(result2[0:2], data["outcome"][0:2]),
                         False)
        self.assertEqual(ma.allequal(result2[2:], data["outcome"][2:]), True)

        # check if exclusion of missing values is working
        dataset.set_values_of_one_attribute("outcome", array([0, 0]),
                                            array([2, 4]))
        dataset.delete_one_attribute("_init_error_outcome")
        model.run(specification,
                  coef,
                  dataset,
                  run_config=Configuration(
                      {'exclude_missing_values_from_initial_error': True}))
        initial_error = dataset.get_attribute("_init_error_outcome")
        self.assertEqual(ma.allequal(initial_error[array([2, 4])], 0), True)
        self.assertEqual(ma.allequal(initial_error[array([0, 1, 3, 4, 5])], 0),
                         False)
Example #21
0
    def create_edges(self, input_file_dir, input_file_name, output_file_name):
        storage = StorageFactory().get_storage(type='tab_storage',
                                               subdir='store',
                                               storage_location=input_file_dir)
        dataset = Dataset(in_storage=storage,
                          id_name=['stop_id', 'sch_time'],
                          in_table_name=input_file_name)

        n = dataset.size()
        trip_ids = dataset.get_attribute("stop_id")
        unique_trip_ids = unique(trip_ids)
        source_list = list()
        target_list = list()
        time_list = list()

        for trip in unique_trip_ids:
            idx = where(dataset.get_attribute("stop_id") == trip)[0]
            nodes = dataset.get_attribute_by_index("node_id", idx)
            times = dataset.get_attribute_by_index("sch_time", idx)
            for inode in range(nodes.size - 1):
                source_list.append(nodes[inode])
                target_list.append(nodes[inode + 1])
                time_list.append(times[inode + 1] - times[inode])

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(
            table_name='edges',
            table_data={
                'edge_id': arange(len(source_list)) + 1,
                'source': array(
                    source_list),  #type=int64), # <<<< OUTPUT FIELD, USE array
                'target': array(
                    target_list),  #type=int64), # <<<< OUTPUT FIELD, USE array
                'cost': array(time_list, dtype=int32)
            })

        edges = Dataset(in_storage=storage,
                        in_table_name='edges',
                        id_name="edge_id")

        edges.write_dataset(attributes=["source", "target", "cost"],
                            out_storage=storage,
                            out_table_name=output_file_name)
Example #22
0
    def prepare_for_estimate(self,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             agent_set=None,
                             agents_for_estimation_storage=None,
                             agents_for_estimation_table=None,
                             join_datasets=False,
                             index_to_unplace=None,
                             portion_to_unplace=1.0,
                             agent_filter=None,
                             data_objects={}):
        from opus_core.model import get_specification_for_estimation
        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string,
                                            resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace * index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(
                    index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) +
                              " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                       -1 * ones(end_index_to_unplace.size),
                                       end_index_to_unplace)
        # create agents for estimation
        if agents_for_estimation_storage is not None:
            estimation_set = Dataset(in_storage=agents_for_estimation_storage,
                                     in_table_name=agents_for_estimation_table,
                                     id_name=agent_set.get_id_name(),
                                     dataset_name=agent_set.get_dataset_name())
            if agent_filter is not None:
                estimation_set.compute_variables(
                    agent_filter, resources=Resources(data_objects))
                index = where(
                    estimation_set.get_attribute(agent_filter) > 0)[0]
                estimation_set.subset_by_index(
                    index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set,
                                       require_all_attributes=False,
                                       change_ids_if_not_unique=True)
                index = arange(agent_set.size() - estimation_set.size(),
                               agent_set.size())
            else:
                index = agent_set.get_id_index(
                    estimation_set.get_id_attribute())
        else:
            index = arange(agent_set.size())
        return (specification, index)
def prepare_for_estimate(specification_dict=None,
                         specification_storage=None,
                         specification_table=None,
                         agent_set=None,
                         household_set=None,
                         agents_for_estimation_storage=None,
                         agents_for_estimation_table=None,
                         households_for_estimation_table=None,
                         join_datasets=False,
                         filter=None,
                         data_objects=None):
    specification = get_specification_for_estimation(specification_dict,
                                                     specification_storage,
                                                     specification_table)
    if agents_for_estimation_storage is not None:
        estimation_set = Dataset(in_storage=agents_for_estimation_storage,
                                 in_table_name=agents_for_estimation_table,
                                 id_name=agent_set.get_id_name(),
                                 dataset_name=agent_set.get_dataset_name())
        hh_estimation_set = None
        if households_for_estimation_table is not None:
            hh_estimation_set = Dataset(
                in_storage=agents_for_estimation_storage,
                in_table_name=households_for_estimation_table,
                id_name=household_set.get_id_name(),
                dataset_name=household_set.get_dataset_name())

        filter_index = arange(estimation_set.size())
        if filter:
            estimation_set.compute_variables(filter,
                                             resources=Resources(data_objects))
            filter_index = where(estimation_set.get_attribute(filter) > 0)[0]
            #estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

        if join_datasets:
            if hh_estimation_set is not None:
                household_set.join_by_rows(hh_estimation_set,
                                           require_all_attributes=False,
                                           change_ids_if_not_unique=True)

            agent_set.join_by_rows(estimation_set,
                                   require_all_attributes=False,
                                   change_ids_if_not_unique=True)
            index = arange(agent_set.size() - estimation_set.size(),
                           agent_set.size())[filter_index]
        else:
            index = agent_set.get_id_index(
                estimation_set.get_id_attribute()[filter_index])
    else:
        if agent_set is not None:
            index = arange(agent_set.size())
        else:
            index = None

    return (specification, index)
Example #24
0
    def test_simple_lag_variable2(self):
        test_data = {
            1000: {"tests": {"id": array([1, 2, 3, 4]), "attr1": array([10, 20, 30, 40])}},
            1001: {"tests": {"id": array([1, 2, 3, 5]), "attr1": array([111, 222, 333, 555])}},
        }
        cache_creator = CreateTestAttributeCache()
        cache_creator.create_attribute_cache_with_data(self._temp_dir, test_data)

        SimulationState().set_current_time(1001)

        attribute_cache = AttributeCache()
        SessionConfiguration(new_instance=True, package_order=["opus_core"], in_storage=attribute_cache)

        ds = Dataset(in_storage=attribute_cache, in_table_name="tests", id_name=["id"], dataset_name="tests")

        ds.compute_variables(["opus_core.tests.attr1"])
        self.assert_(ma.allequal(ds.get_attribute("attr1"), array([111, 222, 333, 555])))

        ds.compute_variables(["opus_core.tests.attr1_lag1"])
        self.assert_(ma.allequal(ds.get_attribute("attr1_lag1"), array([10, 20, 30, 555])))
Example #25
0
 def test_compute_unloads_from_memory(self):
     
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(
         table_name='tests',
         table_data={
             'a_dependent_variable':array([1,5,10]),
             'id':array([1,3,4])
             }
         )
     
     dataset = Dataset(in_storage=storage, in_table_name='tests', id_name='id', dataset_name='tests')
     
     SessionConfiguration(in_storage=storage)["flush_variables"] = True
     dataset.get_attribute("a_dependent_variable")
     self.assert_("a_dependent_variable" in dataset.get_attributes_in_memory())
     dataset.compute_variables("opus_core.tests.a_test_variable")
     self.assert_("a_dependent_variable" not in dataset.get_attributes_in_memory())
     self.assert_("a_test_variable" in dataset.get_attributes_in_memory())
     SimulationState().remove_singleton(delete_cache=True)
class SimpleModelTest(opus_unittest.OpusTestCase):
    def setUp(self):
        self.data = {
            'id': arange(10)+1,
            'attribute':  array([3000,2800,1000,550,600,1000,2000,500,100,1000])
        }
        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name = 'dataset', table_data = self.data)
        self.dataset = Dataset(in_storage=storage, in_table_name='dataset', id_name=['id'])

    def test_simple_model(self):
        m = SimpleModel()
        m.run(self.dataset, 'sqrt(dataset.attribute)', outcome_attribute='sqrtattr')
        self.assertEqual(ma.allclose(self.dataset.get_attribute('sqrtattr'), sqrt(self.data['attribute'])), True)
        self.assertEqual('sqrtattr' in self.dataset.get_primary_attribute_names(), True)
        
    def test_simple_model_without_outcome_attribute(self):
        m = SimpleModel()
        m.run(self.dataset, 'lattr = ln(dataset.attribute)')
        self.assertEqual(ma.allclose(self.dataset.get_attribute('lattr'), log(self.data['attribute'])), True)
        self.assertEqual('lattr' in self.dataset.get_primary_attribute_names(), True)
    def test_estimation_without_procedure(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(
            table_name='dataset',
            table_data={
                "id":array([1,2,3,4]),
                "attr1":array([4,7,2,1]),
                "attr2":array([6.8,2.6,0,1]),
                "submodel_id": array([1,2,2,1]),
                "outcome": array([0,1,0,1])
                }
            )

        ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id")
        specification = EquationSpecification(
                          variables=array(["constant", "attr2", "attr1"]),
                          coefficients=array(["constant", "ba2", "ba1"]))

        model = RegressionModel()
        model.estimate(specification, ds, "outcome")
        data_attr1 = model.get_data("ba1")
        self.assert_(ma.allequal(ds.get_attribute("attr1"), data_attr1),
                     msg = "Error in getting data from regression model")

        specification_2subm = EquationSpecification(
                          variables=array(["constant", "attr2", "constant", "attr1"]),
                          coefficients=array(["constant", "ba2", "constant", "ba1"]),
                          submodels = array([1,1,2,2]))

        model = RegressionModel(submodel_string="submodel_id")
        model.estimate(specification_2subm, ds, "outcome")
        data_attr1 = model.get_data("ba1", 1)
        self.assert_(data_attr1 == None, msg = "Error in getting data from regression model with multiple submodels.")
        data_attr1 = model.get_data("ba1", 2)
        self.assert_(ma.allequal(ds.get_attribute("attr1")[1:3], data_attr1),
                     msg = "Error in getting data from regression model with multiple submodels.")
        d = model.get_data_as_dataset(2)
        self.assert_(ma.allequal(ds.get_attribute("attr1")[1:3], d.get_attribute("ba1")),
                     msg = "Error in getting data from regression model with multiple submodels.")
Example #28
0
    def test_simple_lag_variable2(self):
        test_data = {
            1000: {
                'tests': {
                    'id': array([1, 2, 3, 4]),
                    'attr1': array([10, 20, 30, 40]),
                },
            },
            1001: {
                'tests': {
                    'id': array([1, 2, 3, 5]),
                    'attr1': array([111, 222, 333, 555]),
                },
            },
        }
        cache_creator = CreateTestAttributeCache()
        cache_creator.create_attribute_cache_with_data(self._temp_dir,
                                                       test_data)

        SimulationState().set_current_time(1001)

        attribute_cache = AttributeCache()
        SessionConfiguration(new_instance=True,
                             package_order=['opus_core'],
                             in_storage=attribute_cache)

        ds = Dataset(in_storage=attribute_cache,
                     in_table_name='tests',
                     id_name=['id'],
                     dataset_name='tests')

        ds.compute_variables(['opus_core.tests.attr1'])
        self.assert_(
            ma.allequal(ds.get_attribute('attr1'), array([111, 222, 333,
                                                          555])))

        ds.compute_variables(['opus_core.tests.attr1_lag1'])
        self.assert_(
            ma.allequal(ds.get_attribute('attr1_lag1'),
                        array([10, 20, 30, 555])))
def prepare_for_estimate(specification_dict = None, 
                         specification_storage=None, 
                         specification_table=None,
                         agent_set=None, 
                         household_set=None,
                         agents_for_estimation_storage=None,
                         agents_for_estimation_table=None,
                         households_for_estimation_table=None,
                         join_datasets=False,
                         filter=None,
                         data_objects=None):
    specification = get_specification_for_estimation(specification_dict, 
                                                     specification_storage, 
                                                     specification_table)
    if agents_for_estimation_storage is not None:                 
        estimation_set = Dataset(in_storage = agents_for_estimation_storage, 
                                 in_table_name=agents_for_estimation_table,
                                 id_name=agent_set.get_id_name(), 
                                 dataset_name=agent_set.get_dataset_name())
        hh_estimation_set = None
        if households_for_estimation_table is not None:
            hh_estimation_set = Dataset(in_storage = agents_for_estimation_storage, 
                                     in_table_name=households_for_estimation_table,
                                     id_name=household_set.get_id_name(), 
                                     dataset_name=household_set.get_dataset_name())
        
        filter_index = arange(estimation_set.size())
        if filter:
            estimation_set.compute_variables(filter, resources=Resources(data_objects))
            filter_index = where(estimation_set.get_attribute(filter) > 0)[0]
            #estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)
        
        if join_datasets:
            if hh_estimation_set is not None:
                household_set.join_by_rows(hh_estimation_set, require_all_attributes=False,
                                           change_ids_if_not_unique=True)
                
            agent_set.join_by_rows(estimation_set, require_all_attributes=False,
                                   change_ids_if_not_unique=True)
            index = arange(agent_set.size() - estimation_set.size(), agent_set.size())[filter_index]
        else:
            index = agent_set.get_id_index(estimation_set.get_id_attribute()[filter_index])
    else:
        if agent_set is not None:
            index = arange(agent_set.size())
        else:
            index = None
            
    return (specification, index)
Example #30
0
    def test_compute_unloads_from_memory(self):

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='tests',
                            table_data={
                                'a_dependent_variable': array([1, 5, 10]),
                                'id': array([1, 3, 4])
                            })

        dataset = Dataset(in_storage=storage,
                          in_table_name='tests',
                          id_name='id',
                          dataset_name='tests')

        SessionConfiguration(in_storage=storage)["flush_variables"] = True
        dataset.get_attribute("a_dependent_variable")
        self.assert_(
            "a_dependent_variable" in dataset.get_attributes_in_memory())
        dataset.compute_variables("opus_core.tests.a_test_variable")
        self.assert_(
            "a_dependent_variable" not in dataset.get_attributes_in_memory())
        self.assert_("a_test_variable" in dataset.get_attributes_in_memory())
        SimulationState().remove_singleton(delete_cache=True)
Example #31
0
 def test_dict_dataset(self):
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(
         table_name='dataset',
         table_data={
             "id":array([1,2,3,4]), 
             "attr":array([4,7,2,1])
             }
         )
     
     ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id")
     
     self.assert_(ds.get_attribute("attr").sum()==14, "Something is wrong with the dataset.")
     self.assert_(ds.size()==4, "Wrong size of dataset.")
    def test_change_three_elements(self):
        """3 values are in common - change them to -1. Other attributes stay unchanged."""
        data = {
            'my_id': array([1, 2, 3, 4, 5]),
            'attr': array([10, 2, 3, 50, 2]),
            'attr2': array([4, 3, 2, 5, 3])
        }
        data2 = {'attr': array([2, 6, 7, 3])}
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='dataset', table_data=data)
        dataset = Dataset(in_storage=storage,
                          in_table_name='dataset',
                          id_name='my_id')
        storage.write_table(table_name='dataset2', table_data=data2)
        dataset2 = Dataset(in_storage=storage,
                           in_table_name='dataset2',
                           id_name='attr')
        JoinAttributeModificationModel().run(dataset, dataset2, value=-1)
        self.assertEqual(
            ma.allequal(dataset.get_attribute('attr'),
                        array([10, -1, -1, 50, -1])), True)
        self.assertEqual(
            ma.allequal(dataset.get_attribute('attr2'), data['attr2']), True)
 def test_aggregate_all(self):
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(table_name='zones',
         table_data={'my_variable': array([4,8,0.5,1]), 'id': array([1,2,3,4])})
     storage.write_table(table_name='regions',
         table_data={'id': array([1])})
     ds = Dataset(in_storage=storage, in_table_name='zones', id_name="id", dataset_name="myzone")
     ds2 = Dataset(in_storage=storage, in_table_name='regions', id_name="id", dataset_name="myregion")
     dataset_pool = DatasetPool()
     dataset_pool._add_dataset('myzone', ds)
     dataset_pool._add_dataset('myregion', ds2)
     ds2.compute_variables(["myvar = myregion.aggregate_all(myzone.my_variable)"], dataset_pool=dataset_pool)         
     values = ds2.get_attribute("myvar")
     should_be = array([13.5])
     self.assert_(ma.allclose(values, should_be, rtol=1e-6), "Error in aggregate_all")
Example #34
0
    def test_exogenous_attributes(self):
        base_dataset = Dataset(in_storage=self.storage,
                               in_table_name=self.base_table_name,
                               id_name=self.base_id)
        ExogenousAspectForDataset().apply(base_dataset)

        SimulationState().set_current_time(1980)
        exogenous_attribute1 = base_dataset.get_attribute(
            self.exogenous_attribute1)
        exogenous_attribute2 = base_dataset.get_attribute(
            self.exogenous_attribute2)

        self.assert_(
            ma.allequal(exogenous_attribute1,
                        self.expected_exogenous_attribute_1980_1),
            "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'."
            % (self.expected_exogenous_attribute_1980_1, exogenous_attribute1))
        self.assert_(
            ma.allequal(exogenous_attribute2,
                        self.expected_exogenous_attribute_1980_2),
            "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'."
            % (self.expected_exogenous_attribute_1980_2, exogenous_attribute2))

        SimulationState().set_current_time(1981)
        exogenous_attribute1 = base_dataset.get_attribute(
            self.exogenous_attribute1)
        exogenous_attribute2 = base_dataset.get_attribute(
            self.exogenous_attribute2)

        self.assert_(
            ma.allequal(exogenous_attribute1,
                        self.expected_exogenous_attribute_1981_1),
            "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'."
            % (self.expected_exogenous_attribute_1981_1, exogenous_attribute1))
        self.assert_(
            ma.allequal(exogenous_attribute2,
                        self.expected_exogenous_attribute_1981_2),
            "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'."
            % (self.expected_exogenous_attribute_1981_2, exogenous_attribute2))

        SimulationState().set_current_time(1982)
        exogenous_attribute1 = base_dataset.get_attribute(
            self.exogenous_attribute1)
        exogenous_attribute2 = base_dataset.get_attribute(
            self.exogenous_attribute2)

        self.assert_(
            ma.allequal(exogenous_attribute1,
                        self.expected_exogenous_attribute_1982_1),
            "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'."
            % (self.expected_exogenous_attribute_1982_1, exogenous_attribute1))
        self.assert_(
            ma.allequal(exogenous_attribute2,
                        self.expected_exogenous_attribute_1982_2),
            "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'."
            % (self.expected_exogenous_attribute_1982_2, exogenous_attribute2))
 def test_exogenous_attributes(self):
     base_dataset = Dataset(in_storage=self.storage, in_table_name=self.base_table_name, id_name=self.base_id)
     ExogenousAspectForDataset().apply(base_dataset)
     
     SimulationState().set_current_time(1980)
     exogenous_attribute1 = base_dataset.get_attribute(self.exogenous_attribute1)
     exogenous_attribute2 = base_dataset.get_attribute(self.exogenous_attribute2)
     
     self.assert_(ma.allequal(exogenous_attribute1, self.expected_exogenous_attribute_1980_1),
         "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'."
             % (self.expected_exogenous_attribute_1980_1, exogenous_attribute1))
     self.assert_(ma.allequal(exogenous_attribute2, self.expected_exogenous_attribute_1980_2),
         "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'."
             % (self.expected_exogenous_attribute_1980_2, exogenous_attribute2))
     
     SimulationState().set_current_time(1981)                
     exogenous_attribute1 = base_dataset.get_attribute(self.exogenous_attribute1)
     exogenous_attribute2 = base_dataset.get_attribute(self.exogenous_attribute2)
     
     self.assert_(ma.allequal(exogenous_attribute1, self.expected_exogenous_attribute_1981_1),
         "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'."
             % (self.expected_exogenous_attribute_1981_1, exogenous_attribute1))
     self.assert_(ma.allequal(exogenous_attribute2, self.expected_exogenous_attribute_1981_2),
         "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'."
             % (self.expected_exogenous_attribute_1981_2, exogenous_attribute2))
             
     SimulationState().set_current_time(1982)
     exogenous_attribute1 = base_dataset.get_attribute(self.exogenous_attribute1)
     exogenous_attribute2 = base_dataset.get_attribute(self.exogenous_attribute2)
     
     self.assert_(ma.allequal(exogenous_attribute1, self.expected_exogenous_attribute_1982_1),
         "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'."
             % (self.expected_exogenous_attribute_1982_1, exogenous_attribute1))
     self.assert_(ma.allequal(exogenous_attribute2, self.expected_exogenous_attribute_1982_2),
         "Exogenous attribute loaded incorrectly. Expected '%s'; received '%s'."
             % (self.expected_exogenous_attribute_1982_2, exogenous_attribute2))
    def test_no_change(self):
        """No common values in the join_attribute, therefore no change."""
        data = {'my_id': array([1, 2, 3, 4]), 'attr': array([10, 20, 30, 50])}
        data2 = {'attr': array([2, 6, 7, 3])}
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='dataset', table_data=data)
        dataset = Dataset(in_storage=storage,
                          in_table_name='dataset',
                          id_name='my_id')
        storage.write_table(table_name='dataset2', table_data=data2)
        dataset2 = Dataset(in_storage=storage,
                           in_table_name='dataset2',
                           id_name='attr')
        JoinAttributeModificationModel().run(dataset, dataset2)
        self.assertEqual(
            ma.allequal(dataset.get_attribute('attr'), data['attr']), True)
    def test_regression_model_with_constant_variation(self):
        """Estimate the model and run it on the same data as the estimation. The result should be equal to the original data.
        If there is a change in the explanatory variables, the result should not be equal.
        """
        storage = StorageFactory().get_storage('dict_storage')

        table_name = 'dataset_table'
        data = {
                "attr1":array([30, 0, 90, 100, 65, 50]),
                "attr2":array([2002, 1968, 1880, 1921, 1956, 1989]),
                "attr3":array([0.5, 0.1, 0.3, 0.9, 0.2, 0.8]),
                "outcome": array([20, 40, 15, 5, 40, 30], dtype="int32"),
                "id": array([1,2,3,4, 5, 6])
                }
        storage.write_table(
            table_name=table_name,
            table_data=data
            )
        dataset = Dataset(in_storage=storage, in_table_name=table_name, id_name= "id")

        specification = EquationSpecification(variables=(
            "attr1", "attr2", "attr3", "constant"),
            coefficients=("b1", "b2", "b3", "constant"))

        model = RegressionModelWithAdditionInitialResiduals(outcome_attribute = "outcome")
        coef, dummy = model.estimate(specification, dataset, outcome_attribute = "outcome",
                                     procedure = "opus_core.estimate_linear_regression")
        result = model.run(specification, coef, dataset)

        # if estimated and run on the same data, it should give the original outcome
        self.assertEqual(ma.allequal(result, data["outcome"]), True)

        # if some values changed it shoudn't be the same for those elements
        dataset.set_values_of_one_attribute("attr1", array([32, 10]), arange(2))
        result2 = model.run(specification, coef, dataset)
        self.assertEqual(ma.allequal(result2[0:2], data["outcome"][0:2]), False)
        self.assertEqual(ma.allequal(result2[2:], data["outcome"][2:]), True)
        
        # check if exclusion of missing values is working
        dataset.set_values_of_one_attribute("outcome", array([0,0]), array([2,4]))
        dataset.delete_one_attribute("_init_error_outcome")
        model.run(specification, coef, dataset, run_config=Configuration({
                                          'exclude_missing_values_from_initial_error': True}))
        initial_error = dataset.get_attribute("_init_error_outcome")
        self.assertEqual(ma.allequal(initial_error[array([2,4])], 0), True)
        self.assertEqual(ma.allequal(initial_error[array([0,1,3,4,5])], 0), False)
Example #38
0
    def prepare_for_estimate(self, specification_dict = None, specification_storage=None,
                              specification_table=None, agent_set=None,
                              agents_for_estimation_storage=None,
                              agents_for_estimation_table=None, join_datasets=False,
                              index_to_unplace=None, portion_to_unplace=1.0,
                              agent_filter=None,
                              data_objects={}):
        from opus_core.models.model import get_specification_for_estimation
        specification = get_specification_for_estimation(specification_dict,
                                                          specification_storage,
                                                          specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace*index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                        -1*ones(end_index_to_unplace.size), end_index_to_unplace)
        # create agents for estimation
        if agents_for_estimation_storage is not None:
            estimation_set = Dataset(in_storage = agents_for_estimation_storage,
                                      in_table_name=agents_for_estimation_table,
                                      id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name())
            if agent_filter is not None:
                estimation_set.compute_variables(agent_filter, resources=Resources(data_objects))
                index = where(estimation_set.get_attribute(agent_filter) > 0)[0]
                estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set, require_all_attributes=False,
                                    change_ids_if_not_unique=True)
                index = arange(agent_set.size()-estimation_set.size(),agent_set.size())
            else:
                index = agent_set.get_id_index(estimation_set.get_id_attribute())
        else:
            index = arange(agent_set.size())
        return (specification, index)
Example #39
0
    def test_casting(self):
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='tests',
            table_data={
                "a_dependent_variable":array([1,5,1000]),
                "id":array([1,3,4])
                }
            )
        
        dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests")

        logger.enable_hidden_error_and_warning_words()
        # Next line should cause a 'WARNING' to be logged.
        dataset.compute_variables("opus_core.tests.a_test_variable",
                                  resources=Resources({"check_variables":"*"}))
        logger.disable_hidden_error_and_warning_words()
        
        values = dataset.get_attribute("a_test_variable")
        self.assertEqual(values.dtype.type, int8)
Example #40
0
    def test_casting(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='tests',
                            table_data={
                                "a_dependent_variable": array([1, 5, 1000]),
                                "id": array([1, 3, 4])
                            })

        dataset = Dataset(in_storage=storage,
                          in_table_name='tests',
                          id_name="id",
                          dataset_name="tests")

        logger.enable_hidden_error_and_warning_words()
        # Next line should cause a 'WARNING' to be logged.
        dataset.compute_variables("opus_core.tests.a_test_variable",
                                  resources=Resources({"check_variables":
                                                       "*"}))
        logger.disable_hidden_error_and_warning_words()

        values = dataset.get_attribute("a_test_variable")
        self.assertEqual(values.dtype.type, int8)
Example #41
0
 def test_compute_does_not_unload_from_memory(self):        
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(
         table_name='tests',
         table_data={
             "a_dependent_variable":array([1,5,1000]),
             "id":array([1,3,4])
             }
         )
     
     dataset = Dataset(in_storage=storage, in_table_name='tests', id_name="id", dataset_name="tests")
     
     values = dataset.get_attribute("a_dependent_variable")
     self.assert_("a_dependent_variable" in dataset.get_attributes_in_memory())
     dataset.compute_variables("opus_core.tests.a_test_variable")
     self.assert_("a_dependent_variable" in dataset.get_attributes_in_memory())
     self.assert_("a_test_variable" in dataset.get_attributes_in_memory())
     # The type of values will be int32 on a 32-bit machine, and int64 on a 64 bit machine
     if platform.architecture()[0]=='64bit':
         self.assertEqual(values.dtype.type, int64)
     else:
         self.assertEqual(values.dtype.type, int32)
    def test_no_change(self):
        """No common values in the join_attribute, therefore no change."""
        data = {
           'my_id': array([1,2,3,4]),
           'attr':  array([10,20,30,50])     
                }
        data2 = {
            'attr': array([2,6,7,3])
                 }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='dataset', table_data=data)
        dataset = Dataset(in_storage = storage, 
                           in_table_name='dataset',
                           id_name='my_id'
                           )
        storage.write_table(table_name='dataset2', table_data=data2)
        dataset2 = Dataset(in_storage = storage, 
                           in_table_name='dataset2',
                           id_name='attr'
                           )
        JoinAttributeModificationModel().run(dataset,dataset2)
        self.assertEqual(ma.allequal(dataset.get_attribute('attr'), data['attr']), True)
Example #43
0
class Test(opus_unittest.OpusTestCase):
    def setUp(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(
            table_name='households',
            table_data={
                'household_id': arange(10) + 1,
                #  'household_id':array([1, 2, 3, 4, 5, 6, 7, 8]),
                #  'income'      :array([1, 3, 2, 1, 3, 8, 5, 4]),
                # #'category_id' :array([1, 2, 2, 1, 2, 3, 3, 2]),
                #  'building_id' :array([1, 2, 4, 3, 3, 2, 4, 2]),
                ##'large_area_id':array([1, 1, 2, 3, 3, 1, 2, 1]),
                #
                'grid_id': arange(-1, 9, 1) + 1,
                'lucky': array([1, 0, 1, 0, 1, 1, 1, 1, 0, 0])
            })

        storage.write_table(
            table_name='gridcells',
            table_data={
                #'building_id':    array([1, 2, 3, 4]),
                #'large_area_id':  array([1, 1, 3, 2]),
                'grid_id':
                arange(15) + 1,
                'filter':
                array([0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1]),
                'weight':
                array([0.1, 9, 15, 2, 5, 1, 6, 2.1, .3, 4, 3, 1, 10, 8, 7])
            })
        dataset_pool = SessionConfiguration(
            in_storage=storage).get_dataset_pool()

        #create households
        self.households = Dataset(in_storage=storage,
                                  in_table_name='households',
                                  id_name="household_id",
                                  dataset_name="household")

        # create gridcells
        self.gridcells = Dataset(in_storage=storage,
                                 in_table_name='gridcells',
                                 id_name="grid_id",
                                 dataset_name="gridcell")
        dataset_pool.replace_dataset('household', self.households)
        dataset_pool.replace_dataset('gridcell', self.gridcells)

    def test_1(self):
        """"""
        sample_size = 5
        # check the individual gridcells
        # This is a stochastic model, so it may legitimately fail occassionally.
        index1 = where(self.households.get_attribute("lucky"))[0]
        #index2 = where(self.gridcells.get_attribute("filter"))[0]
        weight = self.gridcells.get_attribute("weight")
        estimation_config = {
            "agent_category_definition": ["household.lucky"],
            "choice_category_definition": ["gridcell.filter+1"]
        }
        for icc in [0, 1]:  #include_chosen_choice?
            #icc = sample([0,1],1)
            sampler_ret = weighted_sampler_by_category().run(
                dataset1=self.households,
                dataset2=self.gridcells,
                index1=index1,
                sample_size=sample_size,
                include_chosen_choice=icc,
                resources=estimation_config)
            # get results
            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE)
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]] = where_chosen[1]

            self.assertEqual(sampled_index.shape, (index1.size, sample_size))
            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                    self.households.get_attribute("grid_id")[index1],
                    UNPLACED_ID)
                chosen_choice_index = resize(array([UNPLACED_ID], dtype=DTYPE),
                                             index1.shape)
                w = where(chosen_choices >= 0)[0]
                # for 64 bit machines, need to coerce the type to int32 -- on a
                # 32 bit machine the astype(int32) doesn't do anything
                chosen_choice_index[w] = sampled_index[
                    w, chosen_choices[w]].astype(int32)
                self.assert_(
                    alltrue(equal(placed_agents_index, chosen_choice_index)))
                sampled_index = sampled_index[:, 1:]

            self.assert_(
                alltrue(
                    lookup(sampled_index.ravel(),
                           arange(self.gridcells.size()),
                           index_if_not_found=UNPLACED_ID) != UNPLACED_ID))
            self.assert_(all(not_equal(weight[sampled_index], 0.0)))
    def prepare_for_estimate(self,
                             add_member_prefix=True,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             building_set=None,
                             buildings_for_estimation_storage=None,
                             buildings_for_estimation_table=None,
                             constants=None,
                             base_year=0,
                             building_categories=None,
                             location_id_variable=None,
                             join_datasets=False,
                             data_objects=None,
                             **kwargs):
        #        buildings = None

        if (building_set is not None):
            if location_id_variable is not None:
                building_set.compute_variables(
                    location_id_variable, resources=Resources(data_objects))

        # create agents for estimation
        if buildings_for_estimation_storage is not None:
            estimation_set = Dataset(
                in_storage=buildings_for_estimation_storage,
                in_table_name=buildings_for_estimation_table,
                id_name=building_set.get_id_name(),
                dataset_name=building_set.get_dataset_name())
            if location_id_variable:
                estimation_set.compute_variables(
                    location_id_variable, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                estimation_set.add_primary_attribute(
                    estimation_set.get_attribute(location_id_variable),
                    VariableName(location_id_variable).alias())

            years = estimation_set.get_attribute("scheduled_year")
            recent_years = constants['recent_years']
            indicator = zeros(estimation_set.size())
            for year in range(base_year - recent_years, base_year + 1):
                indicator = logical_or(indicator, years == year)
            idx = where(logical_not(indicator))[0]
            estimation_set.remove_elements(idx)

            #if filter:
            #estimation_set.compute_variables(filter, resources=Resources(data_objects))
            #index = where(estimation_set.get_attribute(filter) > 0)[0]
            #estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                building_set.join_by_rows(estimation_set,
                                          require_all_attributes=False,
                                          change_ids_if_not_unique=True)
                index = arange(building_set.size() - estimation_set.size(),
                               agent_set.size())
            else:
                index = building_set.get_id_index(
                    estimation_set.get_id_attribute())
        else:
            if building_set is not None:
                index = arange(building_set.size())
            else:
                index = None

        if add_member_prefix:
            specification_table = self.group_member.add_member_prefix_to_table_names(
                [specification_table])

        from opus_core.model import get_specification_for_estimation
        #from urbansim.functions import compute_supply_and_add_to_location_set
        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)

        #specification, dummy = AgentLocationChoiceModelMember.prepare_for_estimate(self, add_member_prefix,
        #specification_dict, specification_storage,
        #specification_table,
        #location_id_variable=location_id_variable,
        #data_objects=data_objects, **kwargs)
        return (specification, index)
class Test(opus_unittest.OpusTestCase):
    def setUp(self):
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='households',
                            table_data={
                                'household_id': arange(10) + 1,
                                'grid_id': arange(-1, 9, 1) + 1,
                                'lucky': array([1, 0, 1, 0, 1, 1, 1, 1, 0, 0])
                            })

        storage.write_table(
            table_name='gridcells',
            table_data={
                'grid_id':
                arange(15) + 1,
                'filter':
                array([0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1]),
                'weight':
                array([0.1, 9, 15, 2, 5, 1, 6, 2.1, .3, 4, 3, 1, 10, 8, 7])
            })

        #create households
        self.households = Dataset(in_storage=storage,
                                  in_table_name='households',
                                  id_name="household_id",
                                  dataset_name="household")

        # create gridcells
        self.gridcells = Dataset(in_storage=storage,
                                 in_table_name='gridcells',
                                 id_name="grid_id",
                                 dataset_name="gridcell")

    def test_1d_weight_array(self):
        """"""
        sample_size = 5
        # check the individual gridcells
        # This is a stochastic model, so it may legitimately fail occassionally.
        index1 = where(self.households.get_attribute("lucky"))[0]
        index2 = where(self.gridcells.get_attribute("filter"))[0]
        weight = self.gridcells.get_attribute("weight")
        for icc in [0, 1]:  #include_chosen_choice?
            #icc = sample([0,1],1)
            sampler_ret = weighted_sampler().run(dataset1=self.households,
                                                 dataset2=self.gridcells,
                                                 index1=index1,
                                                 index2=index2,
                                                 sample_size=sample_size,
                                                 weight="weight",
                                                 include_chosen_choice=icc)
            # get results
            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE)
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]] = where_chosen[1]

            sample_results = sampled_index, chosen_choices
            sampled_index = sample_results[0]
            self.assertEqual(sampled_index.shape, (index1.size, sample_size))
            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                    self.households.get_attribute("grid_id")[index1],
                    UNPLACED_ID)
                chosen_choice_index = resize(array([UNPLACED_ID], dtype=DTYPE),
                                             index1.shape)
                w = where(chosen_choices >= 0)[0]
                # for 64 bit machines, need to coerce the type to int32 -- on a
                # 32 bit machine the astype(int32) doesn't do anything
                chosen_choice_index[w] = sampled_index[w, chosen_choices[w]]
                self.assert_(
                    alltrue(equal(placed_agents_index, chosen_choice_index)))
                sampled_index = sampled_index[:, 1:]

            self.assert_(
                alltrue(
                    lookup(sampled_index.ravel(),
                           index2,
                           index_if_not_found=UNPLACED_ID) != UNPLACED_ID))
            self.assert_(all(not_equal(weight[sampled_index], 0.0)))

    def test_2d_weight_array(self):
        #2d weight
        sample_size = 5
        n = self.households.size()
        index1 = where(self.households.get_attribute("lucky"))[0]
        index2 = where(self.gridcells.get_attribute("filter"))[0]
        lucky = self.households.get_attribute("lucky")
        weight = repeat(self.gridcells.get_attribute("weight")[newaxis, :],
                        n,
                        axis=0)
        for i in range(n):
            weight[i, :] += lucky[i]

        for icc in [0, 1]:
            sampler_ret = weighted_sampler().run(dataset1=self.households,
                                                 dataset2=self.gridcells,
                                                 index1=index1,
                                                 index2=index2,
                                                 sample_size=sample_size,
                                                 weight=weight,
                                                 include_chosen_choice=icc)

            sampled_index = sampler_ret.get_2d_index()
            chosen_choices = UNPLACED_ID * ones(index1.size, dtype=DTYPE)
            where_chosen = where(sampler_ret.get_attribute("chosen_choice"))
            chosen_choices[where_chosen[0]] = where_chosen[1]

            self.assertEqual(sampled_index.shape, (index1.size, sample_size))

            if icc:
                placed_agents_index = self.gridcells.try_get_id_index(
                    self.households.get_attribute("grid_id")[index1],
                    UNPLACED_ID)

                chosen_choice_index = resize(array([UNPLACED_ID], dtype=DTYPE),
                                             index1.shape)
                w = where(chosen_choices >= 0)[0]
                chosen_choice_index[w] = sampled_index[w, chosen_choices[w]]
                self.assert_(
                    alltrue(equal(placed_agents_index, chosen_choice_index)))
                sampled_index = sampled_index[:, 1:]

            self.assert_(
                alltrue(
                    lookup(sampled_index.ravel(),
                           index2,
                           index_if_not_found=UNPLACED_ID) != UNPLACED_ID))

            for j in range(sample_size):
                self.assert_(
                    all(not_equal(weight[j, sampled_index[j, :]], 0.0)))
Example #46
0
#<=> 1=1*sin(phaseshift * peak + c)
#<=> asin(1)= phaseshift * peak + c
#<=> 1.57 = 1.125346622 * 2007 + c
#<=> c = -2258.125221

econcycle = 67  #months in economic cycle
peak = 2007.75  # peak year (late 2007)

#for each cycle (2pi) x economic cycle years pass.
period = (2 * math.pi) / (econcycle / 12.)

#use to shift the peak relative to the calendar year
phase = math.asin(1) - (period * peak)

rows = []
for unit_name in unique(btclass.get_attribute(
        'grouping_id')):  #needs to be defined in building_type_classification
    for yr in range(2001, 2036):
        if unit_name == 3:  #office; six group levels used here
            amp = 2.5 * .01  #amplitude determines the magnitude of the oscillation above base
            base = .06
        elif unit_name == 2:  #inst
            amp = 3 * .01
            base = .15
        elif unit_name == 1:  #comm
            amp = 3 * .01
            base = .06
        elif unit_name == 5:  #visit
            amp = 10 * .01
            base = .3
        elif unit_name == 4:  #res
            amp = 2 * .01
    def prepare_for_estimate(
        self,
        add_member_prefix=True,
        specification_dict=None,
        specification_storage=None,
        specification_table=None,
        building_set=None,
        buildings_for_estimation_storage=None,
        buildings_for_estimation_table=None,
        constants=None,
        base_year=0,
        building_categories=None,
        location_id_variable=None,
        join_datasets=False,
        data_objects=None,
        **kwargs
    ):
        #        buildings = None

        if building_set is not None:
            if location_id_variable is not None:
                building_set.compute_variables(location_id_variable, resources=Resources(data_objects))

        # create agents for estimation
        if buildings_for_estimation_storage is not None:
            estimation_set = Dataset(
                in_storage=buildings_for_estimation_storage,
                in_table_name=buildings_for_estimation_table,
                id_name=building_set.get_id_name(),
                dataset_name=building_set.get_dataset_name(),
            )
            if location_id_variable:
                estimation_set.compute_variables(location_id_variable, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                estimation_set.add_primary_attribute(
                    estimation_set.get_attribute(location_id_variable), VariableName(location_id_variable).alias()
                )

            years = estimation_set.get_attribute("scheduled_year")
            recent_years = constants["recent_years"]
            indicator = zeros(estimation_set.size(), dtype="int32")
            for year in range(base_year - recent_years, base_year + 1):
                indicator = logical_or(indicator, years == year)
            idx = where(logical_not(indicator))[0]
            estimation_set.remove_elements(idx)

            # if filter:
            # estimation_set.compute_variables(filter, resources=Resources(data_objects))
            # index = where(estimation_set.get_attribute(filter) > 0)[0]
            # estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                building_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True)
                index = arange(building_set.size() - estimation_set.size(), building_set.size())
            else:
                index = building_set.get_id_index(estimation_set.get_id_attribute())
        else:
            if building_set is not None:
                index = arange(building_set.size())
            else:
                index = None

        if add_member_prefix:
            specification_table = self.group_member.add_member_prefix_to_table_names([specification_table])

        from opus_core.model import get_specification_for_estimation

        # from urbansim.functions import compute_supply_and_add_to_location_set
        specification = get_specification_for_estimation(specification_dict, specification_storage, specification_table)

        # specification, dummy = AgentLocationChoiceModelMember.prepare_for_estimate(self, add_member_prefix,
        # specification_dict, specification_storage,
        # specification_table,
        # location_id_variable=location_id_variable,
        # data_objects=data_objects, **kwargs)
        return (specification, index)
Example #48
0
class SimpleModelTest(opus_unittest.OpusTestCase):
    def setUp(self):
        self.data = {
            'id':
            arange(10) + 1,
            'attribute':
            array([3000, 2800, 1000, 550, 600, 1000, 2000, 500, 100, 1000]),
            'sqrt_outcome':
            zeros(10)
        }
        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name='dataset', table_data=self.data)
        self.dataset = Dataset(in_storage=storage,
                               in_table_name='dataset',
                               id_name=['id'])

    def test_simple_model(self):
        m = SimpleModel()
        m.run(self.dataset,
              'sqrt(dataset.attribute)',
              outcome_attribute='sqrtattr')
        self.assertEqual(
            ma.allclose(self.dataset.get_attribute('sqrtattr'),
                        sqrt(self.data['attribute'])), True)
        self.assertEqual(
            'sqrtattr' in self.dataset.get_primary_attribute_names(), True)

    def test_simple_model_with_filter(self):
        m = SimpleModel()
        m.run(self.dataset,
              'sqrt(dataset.attribute)',
              outcome_attribute='sqrt_outcome',
              dataset_filter='dataset.attribute>1000')
        expected = array([1, 1, 0, 0, 0, 0, 1, 0, 0, 0]) * sqrt(
            self.data['attribute'])
        self.assertEqual(
            ma.allclose(self.dataset.get_attribute('sqrt_outcome'), expected),
            True)
        self.assertEqual(
            'sqrt_outcome' in self.dataset.get_primary_attribute_names(), True)

    def MASKEDtest_simple_model_with_random_filter(self):
        m = SimpleModel()
        m.run(
            self.dataset,
            'sqrt(dataset.attribute)',
            outcome_attribute='sqrt_outcome',
            dataset_filter=
            '(dataset.attribute>=1000) & (random_like(dataset.attribute)<=0.5)',
        )
        con_filter = self.dataset['attribute'] >= 1000
        results = self.dataset['sqrt_outcome'][con_filter]
        expected = sqrt(self.data['attribute'])[con_filter]
        #test half of the elements passing filter are being sqrt
        self.assertEqual((results == expected).sum(), expected.size / 2)
        self.assertEqual((results != expected).sum(), expected.size / 2)

    def test_simple_model_without_outcome_attribute(self):
        m = SimpleModel()
        m.run(self.dataset, 'lattr = ln(dataset.attribute)')
        self.assertEqual(
            ma.allclose(self.dataset.get_attribute('lattr'),
                        log(self.data['attribute'])), True)
        self.assertEqual('lattr' in self.dataset.get_primary_attribute_names(),
                         True)

    def test_simple_model_with_outcome_values(self):
        m = SimpleModel()
        m.run(self.dataset,
              outcome_attribute='iniattr',
              outcome_values=zeros(10) - 1)
        self.assertEqual(
            ma.allclose(self.dataset.get_attribute('iniattr'),
                        array(10 * [-1])), True)
        self.assertEqual(
            'iniattr' in self.dataset.get_primary_attribute_names(), True)
        # run with filter
        m.run(self.dataset,
              outcome_attribute='iniattr',
              outcome_values=arange(10) + 1,
              dataset_filter='dataset.attribute>1000')
        expected = array([1, 2, -1, -1, -1, -1, 7, -1, -1, -1])
        self.assertEqual(
            ma.allclose(self.dataset.get_attribute('iniattr'), expected), True)
Example #49
0
 def test_err_when_asking_for_attribute_that_is_not_in_cache(self):
     job_set = Dataset(self.job_set_resources, dataset_name="jobs")
     job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY)
     job_set.flush_dataset()
     job_set.get_attribute('job_id')
     self.assertRaises(NameError, job_set.get_attribute, 'attribute_that_does_not_exist')
    def prepare_for_estimate(self, specification_dict = None, specification_storage=None,
                              specification_table=None, agent_set=None, 
                              agents_for_estimation_storage=None,
                              agents_for_estimation_table=None, join_datasets=False,
                              index_to_unplace=None, portion_to_unplace=1.0,
                              compute_lambda=False, grouping_location_set=None,
                              movers_variable=None, movers_index=None,
                              filter=None, location_id_variable=None,
                              data_objects={}):
        """Put 'location_id_variable' always in, if the location id is to be computed on the estimation set,
        i.e. if it is not a primary attribute of the estimation set. Set 'index_to_unplace' to None, if 'compute_lambda' is True.
        In such a case, the annual supply is estimated without unplacing agents. 'grouping_location_set', 'movers_variable' and
        'movers_index' must be given, if 'compute_lambda' is True.
        """
        from opus_core.model import get_specification_for_estimation
        from urbansim.functions import compute_supply_and_add_to_location_set
        specification = get_specification_for_estimation(specification_dict,
                                                          specification_storage,
                                                          specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace*index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                        resize(array([-1]), end_index_to_unplace.size), end_index_to_unplace)
        if compute_lambda:
            movers = zeros(agent_set.size(), dtype="bool8")
            if movers_index is not None:
                movers[movers_index] = 1
            agent_set.add_primary_attribute(movers, "potential_movers")
            self.estimate_config["weights_for_estimation_string"] = self.estimate_config["weights_for_estimation_string"]+"_from_lambda"
            compute_supply_and_add_to_location_set(self.choice_set, grouping_location_set,
                                                   self.run_config["number_of_units_string"],
                                                   self.run_config["capacity_string"],
                                                   movers_variable,
                                                   self.estimate_config["weights_for_estimation_string"],
                                                   resources=Resources(data_objects))

        # create agents for estimation
        if (agents_for_estimation_storage is not None) and (agents_for_estimation_table is not None):
            estimation_set = Dataset(in_storage = agents_for_estimation_storage,
                                      in_table_name=agents_for_estimation_table,
                                      id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name())
            if location_id_variable is not None:
                estimation_set.compute_variables(location_id_variable, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                estimation_set.add_primary_attribute(estimation_set.get_attribute(location_id_variable), VariableName(location_id_variable).get_alias())
            if filter:
                values = estimation_set.compute_variables(filter, resources=Resources(data_objects))
                index = where(values > 0)[0]
                estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set, require_all_attributes=False,
                                    change_ids_if_not_unique=True)
                index = arange(agent_set.size()-estimation_set.size(),agent_set.size())
            else:
                index = agent_set.get_id_index(estimation_set.get_id_attribute())
        else:
            if agent_set is not None:
                if filter is not None:
                    values = agent_set.compute_variables(filter, resources=Resources(data_objects))
                    index = where(values > 0)[0]
                else:
                    index = arange(agent_set.size())
            else:
                index = None
        return (specification, index)
    def viewDatasetAction(self):
        #print "viewDatasetAction"
        model = self.model
        table_name = str(model.fileName(self.currentIndex))
        table_name_full = str(model.filePath(self.currentIndex))
        parentIndex = model.parent(self.currentIndex)
        parent_name = str(model.fileName(parentIndex))
        parent_name_full = str(model.filePath(parentIndex))
        storage = StorageFactory().get_storage('flt_storage', storage_location=parent_name_full)
        columns = storage.get_column_names(table_name)
        # temporarily use the table name for the dataset name
        # dataset_name = DatasetFactory().dataset_name_for_table(table_name)
        # Aaron - please check this way of getting the XMLConfiguration -- is this the best way?

#        general = self.mainwindow.toolboxBase.opus_core_xml_configuration.get_section('general')
#        # problem: this gets the package order for the current project, but the viewer shows all the data
#        package_order = general['dataset_pool_configuration'].package_order

        # PREVIOUS HACK:
        # package_order = ['seattle_parcel','urbansim_parcel', 'eugene', 'urbansim', 'opus_core']
        # temporary code: just use a generic dataset for now
        data = Dataset(in_storage=storage, dataset_name=table_name, in_table_name=table_name, id_name=[])
        # code to get a more specialized dataset if possible (doesn't work with table names not ending in 's'
        # unless they are in the exceptions list in DatasetFactory)
        # data = DatasetFactory().search_for_dataset_with_hidden_id(dataset_name, package_order,
        #    arguments={'in_storage': storage, 'in_table_name': table_name})
        # Need to add a new tab to the main tabs for display of the data
        container = QWidget()
        widgetLayout = QVBoxLayout(container)
        summaryGroupBox = QGroupBox(container)
        summaryGroupBox.setTitle(QString("Year: %s  Run name: %s" % (parent_name,table_name_full.split('/')[-3])))
        summaryGroupBox.setFlat(True)
        summaryGroupBoxLayout = QVBoxLayout(summaryGroupBox)
        # Grab the summary data
        buffer = StringIO()
        data.summary(output=buffer)
        strng = buffer.getvalue()
        buffer.close()
        textBrowser = QTextBrowser()
#        textBrowser.insertPlainText(strng)
        textBrowser.insertHtml(self.parse_dataset_summary(strng))
        summaryGroupBoxLayout.addWidget(textBrowser)

        widgetLayout.addWidget(summaryGroupBox)

        tableGroupBox = QGroupBox(container)
        tableGroupBox.setTitle(QString("Table View"))
        tableGroupBox.setFlat(True)
        tableGroupBoxLayout = QVBoxLayout(tableGroupBox)
        tv = QTableView()
        header = columns
        tabledata_tmp = []
        for column in columns:
            tabledata_tmp.append(data.get_attribute(column))

        # Transpose the lists
        tabledata = map(None,*tabledata_tmp)

        # If the table data is not empty then we display it
        if tabledata:
            #tv.resizeColumnsToContents()
            tm = TableModel(tabledata, header, container)
            tv.setModel(tm)
            tv.setSortingEnabled(True)
            tableGroupBoxLayout.addWidget(tv)

        widgetLayout.addWidget(tableGroupBox)

        container.tabIcon = IconLibrary.icon('inspect')
        container.tabLabel = QString(table_name)
        self.manager._attach_tab(container)
Example #52
0
import os
import urbansim
us_path = urbansim.__path__[0]
from opus_core.storage_factory import StorageFactory
storage = StorageFactory().get_storage('tab_storage',
    storage_location = os.path.join(us_path, "data/tutorial"))

from opus_core.datasets.dataset import Dataset
households = Dataset(in_storage = storage,
                         in_table_name = 'households', 
                         id_name='household_id',
                         dataset_name='household')
households.get_attribute_names()
households.get_id_attribute()
households.size()
households.get_attribute("income")
households.get_attribute_names()
households.load_dataset()
households.get_attribute_names()
#households.plot_histogram("income", bins = 10)
#households.r_histogram("income")
#households.r_scatter("persons", "income")
households.correlation_coefficient("persons", "income")
households.correlation_matrix(["persons", "income"])
households.summary()
households.add_primary_attribute(data=[4,6,9,2,4,8,2,1,3,2], name="location")
households.get_attribute_names()
households.modify_attribute(name="location", data=[0,0], index=[0,1])
households.get_attribute("location")
households.get_data_element_by_id(5).location
    def viewDatasetAction(self):
        #print "viewDatasetAction"
        model = self.model
        table_name = str(model.fileName(self.currentIndex))
        table_name_full = str(model.filePath(self.currentIndex))
        parentIndex = model.parent(self.currentIndex)
        parent_name = str(model.fileName(parentIndex))
        parent_name_full = str(model.filePath(parentIndex))
        storage = StorageFactory().get_storage(
            'flt_storage', storage_location=parent_name_full)
        columns = storage.get_column_names(table_name)
        # temporarily use the table name for the dataset name
        # dataset_name = DatasetFactory().dataset_name_for_table(table_name)
        # Aaron - please check this way of getting the XMLConfiguration -- is this the best way?

        #        general = self.mainwindow.toolboxBase.opus_core_xml_configuration.get_section('general')
        #        # problem: this gets the package order for the current project, but the viewer shows all the data
        #        package_order = general['dataset_pool_configuration'].package_order

        # PREVIOUS HACK:
        # package_order = ['seattle_parcel','urbansim_parcel', 'eugene', 'urbansim', 'opus_core']
        # temporary code: just use a generic dataset for now
        data = Dataset(in_storage=storage,
                       dataset_name=table_name,
                       in_table_name=table_name,
                       id_name=[])
        # code to get a more specialized dataset if possible (doesn't work with table names not ending in 's'
        # unless they are in the exceptions list in DatasetFactory)
        # data = DatasetFactory().search_for_dataset_with_hidden_id(dataset_name, package_order,
        #    arguments={'in_storage': storage, 'in_table_name': table_name})
        # Need to add a new tab to the main tabs for display of the data
        container = QWidget()
        widgetLayout = QVBoxLayout(container)
        summaryGroupBox = QGroupBox(container)
        summaryGroupBox.setTitle(
            QString("Year: %s  Run name: %s" %
                    (parent_name, table_name_full.split('/')[-3])))
        summaryGroupBox.setFlat(True)
        summaryGroupBoxLayout = QVBoxLayout(summaryGroupBox)
        # Grab the summary data
        buffer = StringIO()
        data.summary(output=buffer, unload_after_each_attribute=True)
        strng = buffer.getvalue()
        buffer.close()
        textBrowser = QTextBrowser()
        #        textBrowser.insertPlainText(strng)
        textBrowser.insertHtml(self.parse_dataset_summary(strng))
        summaryGroupBoxLayout.addWidget(textBrowser)

        widgetLayout.addWidget(summaryGroupBox)

        tableGroupBox = QGroupBox(container)
        tableGroupBox.setTitle(QString("Table View"))
        tableGroupBox.setFlat(True)
        tableGroupBoxLayout = QVBoxLayout(tableGroupBox)
        tv = QTableView()
        header = columns
        tabledata_tmp = []
        for column in columns:
            tabledata_tmp.append(data.get_attribute(column))

        # Transpose the lists
        tabledata = map(None, *tabledata_tmp)

        # If the table data is not empty then we display it
        if tabledata:
            #tv.resizeColumnsToContents()
            tm = TableModel(tabledata, header, container)
            tv.setModel(tm)
            tv.setSortingEnabled(True)
            tableGroupBoxLayout.addWidget(tv)

        widgetLayout.addWidget(tableGroupBox)

        container.tabIcon = IconLibrary.icon('inspect')
        container.tabLabel = QString(table_name)
        self.manager._attach_tab(container)