Python Dataset.load_dataset Examples, opus_core.datasets.dataset.Dataset.load_dataset Python Examples

Example #1

0

Show file

File: cache_scenario_database.py Project: christianurich/VIBe2UrbanSim

    def cache_database_table(self, table_name, base_year, database, in_storage, config):
        """Copy this table from input database into attribute cache.
        """
        logger.start_block('Caching table %s' % table_name)
        try:
            #TODO: why is the config being modified...seems like its kind of useless here...
            config['storage_location'] = os.path.join(config['cache_directory'], str(base_year), table_name)
            
            if not os.path.exists(config['storage_location']):
                flt_storage = StorageFactory().get_storage(
                   type='flt_storage', 
                   subdir='store', 
                   storage_location=config['storage_location'])
                
                table = database.get_table(table_name)
                
                id_name = [primary_key.name.lower() for primary_key in table.primary_key]

                dataset = Dataset(resources=config, 
                                  in_storage=in_storage,
                                  out_storage=flt_storage,
                                  in_table_name=table_name,
                                  id_name = id_name)

                nchunks = config['creating_baseyear_cache_configuration'].tables_to_cache_nchunks.get(table_name, 1)
                current_time = SimulationState().get_current_time()
                SimulationState().set_current_time(base_year)
                dataset.load_dataset(nchunks=nchunks, flush_after_each_chunk=True)
                SimulationState().set_current_time(current_time)
            else:
                logger.log_status(config['storage_location'] + " already exits; skip caching " + table_name)
            
        finally:
            logger.end_block()

Example #2

0

Show file

File: development_group_dataset.py Project: christianurich/VIBe2UrbanSim

    def __init__(self, resources=None, in_storage=None, out_storage=None,
                  in_table_name=None, attributes=None,
                  out_table_name=None, id_name=None,
                  nchunks=None, other_in_table_names=None,
                  debuglevel=0):
        debug = DebugPrinter(debuglevel)
        debug.print_debug("Creating DevelopmentGroupDataset object.",2)
        resources = ResourceCreatorDevelopmentGroups().get_resources_for_dataset(
            resources = resources,
            in_storage = in_storage,
            out_storage = out_storage,
            in_table_name = in_table_name,
            out_table_name = out_table_name,
            attributes = attributes,
            id_name = id_name,
            id_name_default = self.id_name_default,
            nchunks = nchunks,
            debug = debug
            )

        Dataset.__init__(self,resources = resources)

        if isinstance(other_in_table_names,list):
            for place_name in other_in_table_names: #load other tables
                ds = Dataset(resources = resources)
                ds.load_dataset(in_table_name=place_name)
                self.connect_datasets(ds)

Example #3

0

Show file

File: estimation_HLCM_with_price.py Project: christianurich/VIBe2UrbanSim

def get_households_for_estimation(agent_set, in_storage, 
                                  agents_for_estimation_table_name, 
                                  exclude_condition=None,
                                  join_datasets=True):
    estimation_set = Dataset(in_storage = in_storage,
                             in_table_name=agents_for_estimation_table_name,
                             id_name=agent_set.get_id_name(), 
                             dataset_name=agent_set.get_dataset_name())
    agent_set.unload_primary_attributes()
    agent_set.load_dataset(attributes='*')
    estimation_set.load_dataset(attributes=agent_set.get_primary_attribute_names())
    if join_datasets:
        agent_set.join_by_rows(estimation_set, 
                               require_all_attributes=False,
                               change_ids_if_not_unique=True)
        index = arange(agent_set.size()-estimation_set.size(),agent_set.size())
    else:
        index = agent_set.get_id_index(estimation_set.get_id_attribute())

    exclude_ids = []
    if exclude_condition is not None:
        exclude_ids = agent_set.get_id_attribute()[where(agent_set.compute_variables(exclude_condition))]
        
    for id in exclude_ids:
        minus = agent_set.get_id_index(id)
        if minus in index:            
            index = index[index != minus]
        
    return (agent_set, index)

Example #4

0

Show file

File: employment_sector_group_dataset.py Project: urban-ai/VIBe2UrbanSim

    def __init__(self,
            resources=None,
            in_storage=None,
            out_storage=None,
            in_table_name=None,
            out_table_name=None,
            attributes=None,
            id_name=None,
            nchunks=None,
            other_in_table_names=None,
            debuglevel=0
            ):
        debug = DebugPrinter(debuglevel)
        debug.print_debug("Creating EmploymentSectorGroupDataset object.",2)
        resources = ResourceCreatorEmploymentSectorGroups().get_resources_for_dataset(
            resources = resources,
            in_storage = in_storage,
            out_storage = out_storage,
            in_table_name = in_table_name,
            out_table_name = out_table_name,
            attributes = attributes,
            id_name = id_name,
            id_name_default = self.id_name_default,
            nchunks = nchunks,
            debug = debug,
            )

        Dataset.__init__(self,resources = resources)

        if isinstance(other_in_table_names,list):
            for place_name in other_in_table_names: #load other tables
                ds = Dataset(resources = resources)
                ds.load_dataset(in_table_name=place_name)
                self.connect_datasets(ds)

Example #5

0

Show file

 def flush_dataset(self):
     Dataset.flush_dataset(self)
     group_dataset = Dataset(in_storage=self.resources["in_storage"],
                             in_table_name=self.resources["in_table_name_groups"],
                             id_name=[self.get_id_name()[0], self.group_id_name])
     group_dataset.load_dataset()
     group_dataset.flush_dataset()

Example #6

0

Show file

File: estimation_HLCM_with_price.py Project: urban-ai/VIBe2UrbanSim

def get_households_for_estimation(agent_set,
                                  in_storage,
                                  agents_for_estimation_table_name,
                                  exclude_condition=None,
                                  join_datasets=True):
    estimation_set = Dataset(in_storage=in_storage,
                             in_table_name=agents_for_estimation_table_name,
                             id_name=agent_set.get_id_name(),
                             dataset_name=agent_set.get_dataset_name())
    agent_set.unload_primary_attributes()
    agent_set.load_dataset(attributes='*')
    estimation_set.load_dataset(
        attributes=agent_set.get_primary_attribute_names())
    if join_datasets:
        agent_set.join_by_rows(estimation_set,
                               require_all_attributes=False,
                               change_ids_if_not_unique=True)
        index = arange(agent_set.size() - estimation_set.size(),
                       agent_set.size())
    else:
        index = agent_set.get_id_index(estimation_set.get_id_attribute())

    exclude_ids = []
    if exclude_condition is not None:
        exclude_ids = agent_set.get_id_attribute()[where(
            agent_set.compute_variables(exclude_condition))]

    for id in exclude_ids:
        minus = agent_set.get_id_index(id)
        if minus in index:
            index = index[index != minus]

    return (agent_set, index)

Example #7

0

Show file

 def flush_dataset(self):
     Dataset.flush_dataset(self)
     group_dataset = Dataset(
         in_storage=self.resources["in_storage"],
         in_table_name=self.resources["in_table_name_groups"],
         id_name=[self.get_id_name()[0], self.group_id_name])
     group_dataset.load_dataset()
     group_dataset.flush_dataset()

Example #8

0

Show file

File: percent_SSS_difference_from_DDD_max_DDD.py Project: christianurich/VIBe2UrbanSim

            def _write_data_to_year(self, data, cache_dir, year):
                """Writes this data to this year of the cache.  Returns dataset.
                """
                # Import in unit test, so that circular dependency is avoided.
                from opus_core.datasets.dataset import Dataset

                SimulationState().set_cache_directory(cache_dir)

                storage = dict_storage()
                storage.write_table(table_name=self._table_name, table_data=data)

                ds = Dataset(id_name=self._id_name, in_storage=storage, in_table_name=self._table_name)
                ds.load_dataset()
                self._write_dataset_to_cache(ds, cache_dir, year)

Example #9

0

Show file

    def __init__(self,
            resources=None,
            other_in_table_names=None,
            use_groups=True,
            ):

        Dataset.__init__(self,resources = resources)

        if isinstance(other_in_table_names,list):
            for place_name in other_in_table_names: #load other tables
                ds = Dataset(resources = resources)
                ds.load_dataset(in_table_name=place_name)
                self.connect_datasets(ds)

        if use_groups:
            self.groups = self._get_groups()

Example #10

0

Show file

    def __init__(
        self,
        resources=None,
        other_in_table_names=None,
        use_groups=True,
    ):

        Dataset.__init__(self, resources=resources)

        if isinstance(other_in_table_names, list):
            for place_name in other_in_table_names:  #load other tables
                ds = Dataset(resources=resources)
                ds.load_dataset(in_table_name=place_name)
                self.connect_datasets(ds)

        if use_groups:
            self.groups = self._get_groups()

Example #11

0

Show file

File: abstract_percent_SSS_difference_from_DDD.py Project: urban-ai/VIBe2UrbanSim

            def _write_data_to_year(self, data, cache_dir, year):
                """Writes this data to this year of the cache.  Returns dataset.
                """
                # Import in unit test, so that circular dependency is avoided.
                from opus_core.datasets.dataset import Dataset

                SimulationState().set_cache_directory(cache_dir)

                storage = dict_storage()
                storage.write_table(
                    table_name=self._table_name,
                    table_data=data,
                )

                ds = Dataset(id_name=self._id_name,
                             in_storage=storage,
                             in_table_name=self._table_name)
                ds.load_dataset()
                self._write_dataset_to_cache(ds, cache_dir, year)

Example #12

0

Show file

    def cache_database_table(self, table_name, base_year, database, in_storage,
                             config):
        """Copy this table from input database into attribute cache.
        """
        logger.start_block('Caching table %s' % table_name)
        try:
            #TODO: why is the config being modified...seems like its kind of useless here...
            config['storage_location'] = os.path.join(
                config['cache_directory'], str(base_year), table_name)

            if not os.path.exists(config['storage_location']):
                flt_storage = StorageFactory().get_storage(
                    type='flt_storage',
                    subdir='store',
                    storage_location=config['storage_location'])

                table = database.get_table(table_name)

                id_name = [
                    primary_key.name.lower()
                    for primary_key in table.primary_key
                ]

                dataset = Dataset(resources=config,
                                  in_storage=in_storage,
                                  out_storage=flt_storage,
                                  in_table_name=table_name,
                                  id_name=id_name)

                nchunks = config[
                    'creating_baseyear_cache_configuration'].tables_to_cache_nchunks.get(
                        table_name, 1)
                current_time = SimulationState().get_current_time()
                SimulationState().set_current_time(base_year)
                dataset.load_dataset(nchunks=nchunks,
                                     flush_after_each_chunk=True)
                SimulationState().set_current_time(current_time)
            else:
                logger.log_status(config['storage_location'] +
                                  " already exits; skip caching " + table_name)

        finally:
            logger.end_block()

Example #13

0

Show file

us_path = urbansim.__path__[0]
from opus_core.storage_factory import StorageFactory
storage = StorageFactory().get_storage('tab_storage',
    storage_location = os.path.join(us_path, "data/tutorial"))

from opus_core.datasets.dataset import Dataset
households = Dataset(in_storage = storage,
                         in_table_name = 'households', 
                         id_name='household_id',
                         dataset_name='household')
households.get_attribute_names()
households.get_id_attribute()
households.size()
households.get_attribute("income")
households.get_attribute_names()
households.load_dataset()
households.get_attribute_names()
#households.plot_histogram("income", bins = 10)
#households.r_histogram("income")
#households.r_scatter("persons", "income")
households.correlation_coefficient("persons", "income")
households.correlation_matrix(["persons", "income"])
households.summary()
households.add_primary_attribute(data=[4,6,9,2,4,8,2,1,3,2], name="location")
households.get_attribute_names()
households.modify_attribute(name="location", data=[0,0], index=[0,1])
households.get_attribute("location")
households.get_data_element_by_id(5).location

#households.write_dataset(out_storage=storage, out_table_name="households_output")

Example #14

0

Show file

File: exogenous_aspect_for_dataset.py Project: apdjustino/DRCOG_Urbansim

        def _proxy_for_get_attribute(name):
            try:
                return native_get_attribute(name)
                
            except NameError:
                if not isinstance(name, VariableName):
                    name = VariableName(name)
                short_name = name.get_alias()

                current_year = SimulationState().get_current_time()
                
                if short_name in dataset.exogenous_attribute_names.keys():
                    exogenous_table_name = dataset.exogenous_attribute_names[short_name]
                    
                    temporary_dataset = Dataset(in_storage=dataset.resources['in_storage'], 
                        in_table_name=exogenous_table_name, id_name='id')
                        
                    if ('year' not in dataset.get_attribute_names() or
                            not self.attribute_boxes['year'].is_in_memory()):
                        # Load the data into a temporary dataset because we
                        # don't want dataset to save the values we retrieve,
                        # since then we can't filter them by year.
                        temporary_dataset.load_dataset(
                            nchunks = 1,
                            attributes = [short_name, 'year', 'base_table_id'],
                            in_table_name = exogenous_table_name
                            )
                                    
                else:
                    raise # re-raise NameError
                                
                exogenous_data = temporary_dataset.attribute_boxes[short_name].get_data()
                year_data = temporary_dataset.attribute_boxes['year'].get_data()
                base_table_id_data = temporary_dataset.attribute_boxes['base_table_id'].get_data()

                exogenous_table_data = zip(exogenous_data, year_data, base_table_id_data)

                exogenous_attribute_values = [_attribute
                    for _attribute, _year, _base_table_id in exogenous_table_data
                    if _year == current_year]
                    
                exogenous_base_table_ids = [_base_table_id
                    for _attribute, _year, _base_table_id in exogenous_table_data
                    if _year == current_year]

                base_table_ids = native_get_attribute(dataset.resources['id_name'])

                exogenous_attributes_by_base_table_id = {}
                for base_table_id, value in zip(exogenous_base_table_ids, exogenous_attribute_values):
                    try: 
                        exogenous_attributes_by_base_table_id[base_table_id]
                    except: 
                        exogenous_attributes_by_base_table_id[base_table_id] = value
                    else: 
                        raise AttributeError("Duplicate data for base_table_id "
                            "'%s', year %s."
                                % (base_table_id, current_year))
                        
                
                result = [None]*len(base_table_ids)
                for index in range(len(base_table_ids)):
                    try:
                        result[index] = exogenous_attributes_by_base_table_id[base_table_ids[index]]
                    except KeyError:
                        raise AttributeError("Missing exogenous data for "
                            "base_table_id '%s', year %s." 
                                % (base_table_ids[index], current_year))

                return result

Example #15

0

Show file

        def _proxy_for_get_attribute(name):
            try:
                return native_get_attribute(name)

            except NameError:
                if not isinstance(name, VariableName):
                    name = VariableName(name)
                short_name = name.get_alias()

                current_year = SimulationState().get_current_time()

                if short_name in dataset.exogenous_attribute_names.keys():
                    exogenous_table_name = dataset.exogenous_attribute_names[
                        short_name]

                    temporary_dataset = Dataset(
                        in_storage=dataset.resources['in_storage'],
                        in_table_name=exogenous_table_name,
                        id_name='id')

                    if ('year' not in dataset.get_attribute_names() or
                            not self.attribute_boxes['year'].is_in_memory()):
                        # Load the data into a temporary dataset because we
                        # don't want dataset to save the values we retrieve,
                        # since then we can't filter them by year.
                        temporary_dataset.load_dataset(
                            nchunks=1,
                            attributes=[short_name, 'year', 'base_table_id'],
                            in_table_name=exogenous_table_name)

                else:
                    raise  # re-raise NameError

                exogenous_data = temporary_dataset.attribute_boxes[
                    short_name].get_data()
                year_data = temporary_dataset.attribute_boxes['year'].get_data(
                )
                base_table_id_data = temporary_dataset.attribute_boxes[
                    'base_table_id'].get_data()

                exogenous_table_data = zip(exogenous_data, year_data,
                                           base_table_id_data)

                exogenous_attribute_values = [
                    _attribute for _attribute, _year, _base_table_id in
                    exogenous_table_data if _year == current_year
                ]

                exogenous_base_table_ids = [
                    _base_table_id for _attribute, _year, _base_table_id in
                    exogenous_table_data if _year == current_year
                ]

                base_table_ids = native_get_attribute(
                    dataset.resources['id_name'])

                exogenous_attributes_by_base_table_id = {}
                for base_table_id, value in zip(exogenous_base_table_ids,
                                                exogenous_attribute_values):
                    try:
                        exogenous_attributes_by_base_table_id[base_table_id]
                    except:
                        exogenous_attributes_by_base_table_id[
                            base_table_id] = value
                    else:
                        raise AttributeError(
                            "Duplicate data for base_table_id "
                            "'%s', year %s." % (base_table_id, current_year))

                result = [None] * len(base_table_ids)
                for index in range(len(base_table_ids)):
                    try:
                        result[index] = exogenous_attributes_by_base_table_id[
                            base_table_ids[index]]
                    except KeyError:
                        raise AttributeError(
                            "Missing exogenous data for "
                            "base_table_id '%s', year %s." %
                            (base_table_ids[index], current_year))

                return result

Example #16

0

Show file

File: tutorial_code.py Project: urban-ai/VIBe2UrbanSim

us_path = urbansim.__path__[0]
from opus_core.storage_factory import StorageFactory
storage = StorageFactory().get_storage('tab_storage',
    storage_location = os.path.join(us_path, "data/tutorial"))

from opus_core.datasets.dataset import Dataset
households = Dataset(in_storage = storage,
                         in_table_name = 'households', 
                         id_name='household_id',
                         dataset_name='household')
households.get_attribute_names()
households.get_id_attribute()
households.size()
households.get_attribute("income")
households.get_attribute_names()
households.load_dataset()
households.get_attribute_names()
#households.plot_histogram("income", bins = 10)
#households.r_histogram("income")
#households.r_scatter("persons", "income")
households.correlation_coefficient("persons", "income")
households.correlation_matrix(["persons", "income"])
households.summary()
households.add_primary_attribute(data=[4,6,9,2,4,8,2,1,3,2], name="location")
households.get_attribute_names()
households.modify_attribute(name="location", data=[0,0], index=[0,1])
households.get_attribute("location")
households.get_data_element_by_id(5).location

#households.write_dataset(out_storage=storage, out_table_name="households_output")