Python Dataset.subset_by_index Examples

Programming Language: Python

Namespace/Package Name: opus_core.datasets.dataset

Class/Type: Dataset

Method/Function: subset_by_index

Examples at hotexamples.com: 8

Python Dataset.subset_by_index - 8 examples found. These are the top rated real world Python examples of opus_core.datasets.dataset.Dataset.subset_by_index extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

compute_variables(30)

Dataset(30)

get_attribute(30)

size(12)

__init__(9)

load_dataset(8)

get_id_attribute(7)

modify_attribute(6)

flush_dataset(5)

add_primary_attribute(5)

subset_by_index(4)

get_version(4)

write_dataset(4)

touch_attribute(3)

add_attribute(3)

join_by_rows(3)

attribute_sum(2)

get_attribute_by_index(2)

try_get_id_index(2)

get_attributes_in_memory(2)

summary(2)

get_dataset_name(2)

get_primary_attribute_names(2)

join(2)

delete_one_attribute(1)

_prepare_dataset_pool_for_variable(1)

set_values_of_one_attribute(1)

remove_elements(1)

compute_one_variable_with_unknown_package(1)

add_elements(1)

delete_computed_attributes(1)

get_multiple_attributes(1)

get_known_attribute_names(1)

correlation_coefficient(1)

correlation_matrix(1)

get_attribute_names(1)

get_attribute_header(1)

get_data_element_by_id(1)

Example #1

Show file

File: development_project_location_choice_model.py Project: christianurich/VIBe2UrbanSim

    def prepare_for_estimate(self, specification_dict = None, 
                             specification_storage=None,
                             specification_table=None,
                             events_for_estimation_storage=None,
                             events_for_estimation_table=None, 
                             agents_filter='',
                             compute_variables=[],
                             data_objects={}):

        specification = get_specification_for_estimation(specification_dict,
                                                          specification_storage,
                                                          specification_table)
        projects = None
        # create agents for estimation
        if events_for_estimation_storage is not None:
            projects = Dataset(in_storage = events_for_estimation_storage,
                               in_table_name= events_for_estimation_table,
                               id_name=[], dataset_name='development_project'
                               )
            if compute_variables:
                projects.compute_variables(compute_variables, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                #projects.add_primary_attribute(estimation_set.get_attribute(location_id_variable), 
                #                               VariableName(location_id_variable).get_alias())
            
            if agents_filter:
                values = projects.compute_variables(agents_filter, resources=Resources(data_objects))
                index = where(values > 0)[0]
                projects.subset_by_index(index, flush_attributes_if_not_loaded=False)
                
        return (specification, projects)

Example #2

Show file

    def prepare_for_estimate(self,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             events_for_estimation_storage=None,
                             events_for_estimation_table=None,
                             agents_filter='',
                             compute_variables=[],
                             data_objects={}):

        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)
        projects = None
        # create agents for estimation
        if events_for_estimation_storage is not None:
            projects = Dataset(in_storage=events_for_estimation_storage,
                               in_table_name=events_for_estimation_table,
                               id_name=[],
                               dataset_name='development_project')
            if compute_variables:
                projects.compute_variables(compute_variables,
                                           resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                #projects.add_primary_attribute(estimation_set.get_attribute(location_id_variable),
                #                               VariableName(location_id_variable).get_alias())

            if agents_filter:
                values = projects.compute_variables(
                    agents_filter, resources=Resources(data_objects))
                index = where(values > 0)[0]
                projects.subset_by_index(index,
                                         flush_attributes_if_not_loaded=False)

        return (specification, projects)

Example #3

Show file

    def prepare_for_estimate(self,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             agent_set=None,
                             agents_for_estimation_storage=None,
                             agents_for_estimation_table=None,
                             join_datasets=False,
                             index_to_unplace=None,
                             portion_to_unplace=1.0,
                             agent_filter=None,
                             data_objects={}):
        from opus_core.model import get_specification_for_estimation
        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string,
                                            resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace * index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(
                    index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) +
                              " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                       -1 * ones(end_index_to_unplace.size),
                                       end_index_to_unplace)
        # create agents for estimation
        if agents_for_estimation_storage is not None:
            estimation_set = Dataset(in_storage=agents_for_estimation_storage,
                                     in_table_name=agents_for_estimation_table,
                                     id_name=agent_set.get_id_name(),
                                     dataset_name=agent_set.get_dataset_name())
            if agent_filter is not None:
                estimation_set.compute_variables(
                    agent_filter, resources=Resources(data_objects))
                index = where(
                    estimation_set.get_attribute(agent_filter) > 0)[0]
                estimation_set.subset_by_index(
                    index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set,
                                       require_all_attributes=False,
                                       change_ids_if_not_unique=True)
                index = arange(agent_set.size() - estimation_set.size(),
                               agent_set.size())
            else:
                index = agent_set.get_id_index(
                    estimation_set.get_id_attribute())
        else:
            index = arange(agent_set.size())
        return (specification, index)

Example #4

Show file

File: development_project_proposal_choice_model.py Project: psrc/urbansim

    def prepare_for_estimate(self, specification_dict = None, 
                             specification_storage=None, 
                             specification_table=None,
                             agent_set=None, 
                             agents_for_estimation_storage=None,
                             agents_for_estimation_table=None,
                             filter_for_estimation_set=None,
                             data_objects=None):
        specification = get_specification_for_estimation(specification_dict, 
                                                         specification_storage, 
                                                         specification_table)
        if self.filter is not None:
            agents_index = where( self.proposal_set.compute_variables(self.filter) )[0]        
        
        id_attribute_name = ['parcel_id', 'template_id', 'is_redevelopment']
        if agents_for_estimation_storage is not None:
            estimation_set = Dataset(in_storage = agents_for_estimation_storage, 
                                     in_table_name=agents_for_estimation_table,
                                     id_name=id_attribute_name, 
                                     dataset_name=agent_set.get_dataset_name())
            
            filter_index = arange(estimation_set.size())
            if filter_for_estimation_set:
                filter_index = where(estimation_set.compute_variables(filter_for_estimation_set, resources=Resources(data_objects)))[0]
                estimation_set.subset_by_index(filter_index, flush_attributes_if_not_loaded=False)
                
            id_attributes = None
            for attr_name in id_attribute_name:
                attr_value = agent_set.get_attribute_as_column(attr_name)
                if id_attributes == None:
                    id_attributes = attr_value
                else:
                    id_attributes = concatenate((id_attributes, attr_value), axis=1)
                    
            id_index = estimation_set.try_get_id_index(id_attributes, return_value_if_not_found=-1)

            status_id = 2 * ones(agent_set.size(), dtype="int8")
            status_id[where(id_index != -1)] = 1
            name = self.choice_attribute_name.get_alias()
            if name in agent_set.get_known_attribute_names():
                agent_set.set_values_of_one_attribute(name, status_id[where(id_index != -1)], where(id_index!=-1)[0])
            else:
                agent_set.add_primary_attribute(status_id, name)
            
        return (specification, agents_index)

Example #5

Show file

    def prepare_for_estimate(self, specification_dict = None, specification_storage=None,
                              specification_table=None, agent_set=None,
                              agents_for_estimation_storage=None,
                              agents_for_estimation_table=None, join_datasets=False,
                              index_to_unplace=None, portion_to_unplace=1.0,
                              agent_filter=None,
                              data_objects={}):
        from opus_core.models.model import get_specification_for_estimation
        specification = get_specification_for_estimation(specification_dict,
                                                          specification_storage,
                                                          specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace*index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                        -1*ones(end_index_to_unplace.size), end_index_to_unplace)
        # create agents for estimation
        if agents_for_estimation_storage is not None:
            estimation_set = Dataset(in_storage = agents_for_estimation_storage,
                                      in_table_name=agents_for_estimation_table,
                                      id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name())
            if agent_filter is not None:
                estimation_set.compute_variables(agent_filter, resources=Resources(data_objects))
                index = where(estimation_set.get_attribute(agent_filter) > 0)[0]
                estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set, require_all_attributes=False,
                                    change_ids_if_not_unique=True)
                index = arange(agent_set.size()-estimation_set.size(),agent_set.size())
            else:
                index = agent_set.get_id_index(estimation_set.get_id_attribute())
        else:
            index = arange(agent_set.size())
        return (specification, index)

Example #6

Show file

File: agent_location_choice_model.py Project: christianurich/VIBe2UrbanSim

    def prepare_for_estimate(self, specification_dict = None, specification_storage=None,
                              specification_table=None, agent_set=None, 
                              agents_for_estimation_storage=None,
                              agents_for_estimation_table=None, join_datasets=False,
                              index_to_unplace=None, portion_to_unplace=1.0,
                              compute_lambda=False, grouping_location_set=None,
                              movers_variable=None, movers_index=None,
                              filter=None, location_id_variable=None,
                              data_objects={}):
        """Put 'location_id_variable' always in, if the location id is to be computed on the estimation set,
        i.e. if it is not a primary attribute of the estimation set. Set 'index_to_unplace' to None, if 'compute_lambda' is True.
        In such a case, the annual supply is estimated without unplacing agents. 'grouping_location_set', 'movers_variable' and
        'movers_index' must be given, if 'compute_lambda' is True.
        """
        from opus_core.model import get_specification_for_estimation
        from urbansim.functions import compute_supply_and_add_to_location_set
        specification = get_specification_for_estimation(specification_dict,
                                                          specification_storage,
                                                          specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string, resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace*index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) + " agents.")
            agent_set.modify_attribute(self.choice_set.get_id_name()[0],
                                        resize(array([-1]), end_index_to_unplace.size), end_index_to_unplace)
        if compute_lambda:
            movers = zeros(agent_set.size(), dtype="bool8")
            if movers_index is not None:
                movers[movers_index] = 1
            agent_set.add_primary_attribute(movers, "potential_movers")
            self.estimate_config["weights_for_estimation_string"] = self.estimate_config["weights_for_estimation_string"]+"_from_lambda"
            compute_supply_and_add_to_location_set(self.choice_set, grouping_location_set,
                                                   self.run_config["number_of_units_string"],
                                                   self.run_config["capacity_string"],
                                                   movers_variable,
                                                   self.estimate_config["weights_for_estimation_string"],
                                                   resources=Resources(data_objects))

        # create agents for estimation
        if (agents_for_estimation_storage is not None) and (agents_for_estimation_table is not None):
            estimation_set = Dataset(in_storage = agents_for_estimation_storage,
                                      in_table_name=agents_for_estimation_table,
                                      id_name=agent_set.get_id_name(), dataset_name=agent_set.get_dataset_name())
            if location_id_variable is not None:
                estimation_set.compute_variables(location_id_variable, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                estimation_set.add_primary_attribute(estimation_set.get_attribute(location_id_variable), VariableName(location_id_variable).get_alias())
            if filter:
                values = estimation_set.compute_variables(filter, resources=Resources(data_objects))
                index = where(values > 0)[0]
                estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set, require_all_attributes=False,
                                    change_ids_if_not_unique=True)
                index = arange(agent_set.size()-estimation_set.size(),agent_set.size())
            else:
                index = agent_set.get_id_index(estimation_set.get_id_attribute())
        else:
            if agent_set is not None:
                if filter is not None:
                    values = agent_set.compute_variables(filter, resources=Resources(data_objects))
                    index = where(values > 0)[0]
                else:
                    index = arange(agent_set.size())
            else:
                index = None
        return (specification, index)

Example #7

Show file

class ObservedDataOneQuantity:
    """  Class for storing information about one quantity measure. It is to be grouped in 
    an object of class ObservedData.
    """
    # pairs of inverse transformations
    transformation_pairs = {"sqrt": "**2", "log":"exp", "exp": "log", "**2": "sqrt"}

    def __init__(self, variable_name, observed_data, filename=None,  transformation=None, inverse_transformation=None, 
                 filter=None, match=False, dependent_datasets={}, **kwargs):
        """  'variable_name' is a quantity about which we have data available.
        'observed_data' is of type ObservedData, it is the grouping parent. 
        'filename' is the name of file where 
        the data is stored. It can be None, if the observed_data.directory is a cache.
        'transformation' is an operation to be performed on the data (e.g. sqrt, log),
        'inverse_transformation' is the inverse function of 'transformation'. If it not given, it
        is determined automatically.
        'filter' is a variable that will be applied to both, the observed data and the simulated data.
        'match' (logical) determines if the dataset should be matched (by ids) with the simulated dataset. Elements
        that don't match are eliminated from the simulated dataset.
        'dependent_datasets' (if any) should be a dictionary of dataset_name:{'filename': filename, 'match': True|False, **kwargs}. 
        They will be added to the dataset_pool. 
        Remaining arguments are passed into DatasetFactory, thus it can contain information about how 
        to create the corresponding dataset.
        """
        self.variable_name = VariableName(variable_name)
        self.dataset_name = self.variable_name.get_dataset_name()
        dataset_pool = observed_data.get_dataset_pool()
        self.matching_datasets = {}
        
        if dataset_pool is None:
            kwargs.update({'in_storage':observed_data.get_storage(), 'in_table_name': filename})
            try:
                self.dataset = DatasetFactory().search_for_dataset(self.dataset_name, observed_data.get_package_order(), arguments=kwargs)
            except: # take generic dataset
                self.dataset = Dataset(dataset_name=self.dataset_name, **kwargs)
        else:
            self.dataset = dataset_pool.get_dataset(self.dataset_name)
        if match:
            self.add_match(self.dataset)
        for dep_dataset_name, info in dependent_datasets.iteritems():
            if dataset_pool is None:
                dataset_pool = DatasetPool(storage=observed_data.get_storage(), package_order=observed_data.get_package_order())
            info.update({'in_storage':observed_data.get_storage(), 'in_table_name': info.get('filename')})
            del info['filename']
            match = False
            if 'match' in info.keys():
                match = info['match']
                del info['match']
            try:
                dep_dataset = DatasetFactory().search_for_dataset(dep_dataset_name, observed_data.get_package_order(), arguments=info)
            except:
                dep_dataset = Dataset(dataset_name=dep_dataset_name, **info)
            dataset_pool.replace_dataset(dep_dataset_name, dep_dataset)
            if match:
                self.add_match(dep_dataset)
        if self.variable_name.get_alias() not in self.dataset.get_known_attribute_names():
            self.dataset.compute_variables([self.variable_name], dataset_pool=dataset_pool)
        if filter is not None:
            filter_values = self.dataset.compute_variables([filter], dataset_pool=dataset_pool)
            idx = where(filter_values > 0)[0]
            self.add_match(self.dataset, idx)
            self.dataset.subset_by_index(idx)
        self.transformation = transformation
        self.inverse_transformation = inverse_transformation
        if (self.transformation is not None) and (self.inverse_transformation is None):
            self.inverse_transformation = self.transformation_pairs[self.transformation]
                
    def get_values(self):
        return self.dataset.get_attribute(self.variable_name)
        
    def get_transformed_values(self):
        return try_transformation(self.get_values(), self.transformation)
        
    def get_variable_name(self):
        return self.variable_name
    
    def get_dataset(self):
        return self.dataset
    
    def get_dataset_name(self):
        return self.dataset_name
    
    def get_transformation(self):
        return self.transformation
    
    def get_transformation_pair(self):
        return (self.transformation, self.inverse_transformation)
    
    def add_match(self, dataset, index = None):
        dataset_name = dataset.get_dataset_name()
        result = zeros(dataset.size(), dtype='bool8')
        idx = index
        if index is None:
            idx = arange(dataset.size())
        result[idx] = 1
        if dataset_name in self.matching_datasets.keys():
            tmp = zeros(dataset.size(), dtype='bool8')
            tmp[dataset.get_id_index(self.matching_datasets[dataset_name])]=1
            result = result*tmp
        self.matching_datasets[dataset_name] = dataset.get_id_attribute()[where(result)]
        
    def get_matching_datasets(self):
        return self.matching_datasets

Example #8

Show file

File: agent_location_choice_model.py Project: urban-ai/VIBe2UrbanSim

    def prepare_for_estimate(self,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             agent_set=None,
                             agents_for_estimation_storage=None,
                             agents_for_estimation_table=None,
                             join_datasets=False,
                             index_to_unplace=None,
                             portion_to_unplace=1.0,
                             compute_lambda=False,
                             grouping_location_set=None,
                             movers_variable=None,
                             movers_index=None,
                             filter=None,
                             location_id_variable=None,
                             data_objects={}):
        """Put 'location_id_variable' always in, if the location id is to be computed on the estimation set,
        i.e. if it is not a primary attribute of the estimation set. Set 'index_to_unplace' to None, if 'compute_lambda' is True.
        In such a case, the annual supply is estimated without unplacing agents. 'grouping_location_set', 'movers_variable' and
        'movers_index' must be given, if 'compute_lambda' is True.
        """
        from opus_core.model import get_specification_for_estimation
        from urbansim.functions import compute_supply_and_add_to_location_set
        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)
        if (agent_set is not None) and (index_to_unplace is not None):
            if self.location_id_string is not None:
                agent_set.compute_variables(self.location_id_string,
                                            resources=Resources(data_objects))
            if portion_to_unplace < 1:
                unplace_size = int(portion_to_unplace * index_to_unplace.size)
                end_index_to_unplace = sample_noreplace(
                    index_to_unplace, unplace_size)
            else:
                end_index_to_unplace = index_to_unplace
            logger.log_status("Unplace " + str(end_index_to_unplace.size) +
                              " agents.")
            agent_set.modify_attribute(
                self.choice_set.get_id_name()[0],
                resize(array([-1]), end_index_to_unplace.size),
                end_index_to_unplace)
        if compute_lambda:
            movers = zeros(agent_set.size(), dtype="bool8")
            if movers_index is not None:
                movers[movers_index] = 1
            agent_set.add_primary_attribute(movers, "potential_movers")
            self.estimate_config[
                "weights_for_estimation_string"] = self.estimate_config[
                    "weights_for_estimation_string"] + "_from_lambda"
            compute_supply_and_add_to_location_set(
                self.choice_set,
                grouping_location_set,
                self.run_config["number_of_units_string"],
                self.run_config["capacity_string"],
                movers_variable,
                self.estimate_config["weights_for_estimation_string"],
                resources=Resources(data_objects))

        # create agents for estimation
        if (agents_for_estimation_storage
                is not None) and (agents_for_estimation_table is not None):
            estimation_set = Dataset(in_storage=agents_for_estimation_storage,
                                     in_table_name=agents_for_estimation_table,
                                     id_name=agent_set.get_id_name(),
                                     dataset_name=agent_set.get_dataset_name())
            if location_id_variable is not None:
                estimation_set.compute_variables(
                    location_id_variable, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                estimation_set.add_primary_attribute(
                    estimation_set.get_attribute(location_id_variable),
                    VariableName(location_id_variable).get_alias())
            if filter:
                values = estimation_set.compute_variables(
                    filter, resources=Resources(data_objects))
                index = where(values > 0)[0]
                estimation_set.subset_by_index(
                    index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                agent_set.join_by_rows(estimation_set,
                                       require_all_attributes=False,
                                       change_ids_if_not_unique=True)
                index = arange(agent_set.size() - estimation_set.size(),
                               agent_set.size())
            else:
                index = agent_set.get_id_index(
                    estimation_set.get_id_attribute())
        else:
            if agent_set is not None:
                if filter is not None:
                    values = agent_set.compute_variables(
                        filter, resources=Resources(data_objects))
                    index = where(values > 0)[0]
                else:
                    index = arange(agent_set.size())
            else:
                index = None
        return (specification, index)