Beispiel #1
0
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """  # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id": self.field_submodel_id,
            "field_equation_id": self.field_equation_id,
            "field_coefficient_name": self.field_coefficient_name,
            "field_variable_name": self.field_variable_name,
            "field_fixed_value": self.field_fixed_value,
            "out_table_name": out_table_name
        })
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning(
                "out_storage has to be of type Storage. No EquationSpecifications written."
            )
            return

        submodel_ids = self.get_submodels()
        if submodel_ids.size == 0:
            submodel_ids = resize(array(
                [-2], dtype="int32"), len(self.get_coefficient_names(
                )))  #set sub_model_id = -2 when there is no or 1 submodels

        equation_ids = self.get_equations()
        if equation_ids.size == 0:
            equation_ids = resize(array([-2], dtype="int32"),
                                  submodel_ids.size)

        values = {
            local_resources["field_submodel_id"]: submodel_ids,
            local_resources["field_equation_id"]: equation_ids,
            local_resources["field_coefficient_name"]:
            self.get_coefficient_names(),
            local_resources["field_variable_name"]:
            self.get_long_variable_names()
        }
        if self.fixed_values.size > 0:
            values[local_resources["field_fixed_value"]] = self.fixed_values
        for field in self.other_fields.keys():
            values[field] = self.other_fields[field]

        types = {
            local_resources["field_submodel_id"]: 'integer',
            local_resources["field_equation_id"]: 'integer',
            local_resources["field_coefficient_name"]: 'text',
            local_resources["field_variable_name"]: 'text'
        }

        local_resources.merge({
            "values": values,
            'valuetypes': types,
            "drop_table_flag": 1
        })

        self.out_storage.write_table(
            table_name=local_resources['out_table_name'],
            table_data=local_resources['values'])
 def load(self, resources=None, in_storage=None, in_table_name=None):
     """
     """ # TODO: insert docstring
     local_resources = Resources(resources)
     local_resources.merge_with_defaults({
         "field_submodel_id":self.field_submodel_id,
         "field_coefficient_name":self.field_coefficient_name,
         "field_estimate":self.field_estimate,
         "field_standard_error":self.field_standard_error,
         "other_fields":self.other_fields})
     if in_storage <> None:
         self.in_storage = in_storage
     if not isinstance(self.in_storage, Storage):
         logger.log_warning("in_storage has to be of type Storage. No coefficients loaded.")
     else:
         data = self.in_storage.load_table(table_name=in_table_name)
         submodels = data[local_resources["field_submodel_id"]]
         self.names = data[local_resources["field_coefficient_name"]]
         self.values = data[local_resources["field_estimate"]]
         self.standard_errors = data[local_resources["field_standard_error"]]
         for measure in local_resources["other_fields"]:
             if measure in data.keys():
                 self.other_measures[measure] = data[measure]
         if submodels.max() >= 0:
             self.submodels=submodels
         self.check_consistency()
Beispiel #3
0
 def load(self, resources=None, in_storage=None, in_table_name=None):
     """
     """  # TODO: insert docstring
     local_resources = Resources(resources)
     local_resources.merge_with_defaults({
         "field_submodel_id": self.field_submodel_id,
         "field_coefficient_name": self.field_coefficient_name,
         "field_estimate": self.field_estimate,
         "field_standard_error": self.field_standard_error,
         "other_fields": self.other_fields
     })
     if in_storage <> None:
         self.in_storage = in_storage
     if not isinstance(self.in_storage, Storage):
         logger.log_warning(
             "in_storage has to be of type Storage. No coefficients loaded."
         )
     else:
         data = self.in_storage.load_table(table_name=in_table_name)
         submodels = data[local_resources["field_submodel_id"]]
         self.names = data[local_resources["field_coefficient_name"]]
         self.values = data[local_resources["field_estimate"]]
         self.standard_errors = data[
             local_resources["field_standard_error"]]
         for measure in local_resources["other_fields"]:
             if measure in data.keys():
                 self.other_measures[measure] = data[measure]
         if submodels.max() >= 0:
             self.submodels = submodels
         self.check_consistency()
Beispiel #4
0
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """  # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id": self.field_submodel_id,
            "field_coefficient_name": self.field_coefficient_name,
            "field_estimate": self.field_estimate,
            "field_standard_error": self.field_standard_error,
            "other_fields": self.other_fields,
            "out_table_name": out_table_name
        })
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning(
                "out_storage has to be of type Storage. No coefficients written."
            )
            return

        submodels = self.get_submodels()
        if submodels.size <= 0:
            submodels = resize(array([-2], dtype=int32), self.size())
        values = {
            local_resources["field_submodel_id"]: submodels,
            local_resources["field_coefficient_name"]: self.get_names(),
            local_resources["field_estimate"]: self.get_values(),
            local_resources["field_standard_error"]:
            self.get_standard_errors()
        }
        for measure in self.other_measures.keys():
            values[measure] = self.other_measures[measure]
        types = {
            local_resources["field_submodel_id"]: 'integer',
            local_resources["field_coefficient_name"]: 'text',
            local_resources["field_estimate"]: 'double',
            local_resources["field_standard_error"]: 'double'
        }
        attrtypes = {
            local_resources["field_submodel_id"]: AttributeType.PRIMARY,
            local_resources["field_coefficient_name"]: AttributeType.PRIMARY,
            local_resources["field_estimate"]: AttributeType.PRIMARY,
            local_resources["field_standard_error"]: AttributeType.PRIMARY
        }
        for measure in self.other_measures.keys():
            types[measure] = 'double'
            attrtypes[measure] = AttributeType.PRIMARY
        local_resources.merge({
            "values": values,
            'valuetypes': types,
            "drop_table_flag": 1,
            "attrtype": attrtypes
        })

        self.out_storage.write_table(
            table_name=local_resources['out_table_name'],
            table_data=local_resources['values'])
    def _search_for_dataset_helper(self, dataset_name, package_order,
                                   use_hidden_id, **kwargs):
        # this part of the search_for_dataset code is factored into a helper method, rather than passing in
        # use_hidden_id as a keyword parameter with a default value of False, so that we don't pass this
        # keyword parameter along to the get_dataset method
        for package_name in package_order:
            try:
                dataset = self.get_dataset(dataset_name,
                                           package=package_name,
                                           **kwargs)
                if dataset is not None:
                    break
            except ImportError:
                continue
        else:
            from opus_core.datasets.dataset import Dataset
            from opus_core.resources import Resources

            resources = Resources(kwargs.get('arguments', {}))
            if use_hidden_id:
                id_name_default = []
            else:
                id_name_default = "%s_id" % dataset_name
            (table_name, module_name, class_name
             ) = self._table_module_class_names_for_dataset(dataset_name)
            ## set table_name and id_name_default as default values in resources (arguments)
            resources.merge_with_defaults({
                'dataset_name': dataset_name,
                'in_table_name': table_name,
                'out_table_name': table_name,
                'id_name': id_name_default
            })
            try:
                dataset = Dataset(resources=resources)
            except:
                # try to create a dataset using deprecated values
                (table_name, module_name, class_name
                 ) = self._table_module_class_names_for_dataset_deprecated(
                     dataset_name)
                resources = Resources(kwargs.get('arguments', {}))
                resources.merge_with_defaults({
                    'dataset_name': dataset_name,
                    'in_table_name': table_name,
                    'out_table_name': table_name,
                    'id_name': id_name_default
                })
                try:
                    dataset = Dataset(resources=resources)
                except:
                    logger.log_warning(
                        "Could not create a generic Dataset '%s'." %
                        dataset_name)
                    raise
                #TODO: uncomment this warning when we change to singular
                #logger.log_warning("Dataset %s was created using deprecated table name - using plural will not be supported in the future." % dataset_name)
        return dataset
    def _search_for_dataset_helper(self, dataset_name, package_order, use_hidden_id, **kwargs):
        # this part of the search_for_dataset code is factored into a helper method, rather than passing in
        # use_hidden_id as a keyword parameter with a default value of False, so that we don't pass this
        # keyword parameter along to the get_dataset method
        for package_name in package_order:
            try:
                dataset = self.get_dataset(dataset_name, package=package_name, **kwargs)
                if dataset is not None:
                    break
            except ImportError:
                continue
        else:
            from opus_core.datasets.dataset import Dataset
            from opus_core.resources import Resources

            resources = Resources(kwargs.get("arguments", {}))
            if use_hidden_id:
                id_name_default = []
            else:
                id_name_default = "%s_id" % dataset_name
            (table_name, module_name, class_name) = self._table_module_class_names_for_dataset(dataset_name)
            ## set table_name and id_name_default as default values in resources (arguments)
            resources.merge_with_defaults(
                {
                    "dataset_name": dataset_name,
                    "in_table_name": table_name,
                    "out_table_name": table_name,
                    "id_name": id_name_default,
                }
            )
            try:
                dataset = Dataset(resources=resources)
            except:
                # try to create a dataset using deprecated values
                (table_name, module_name, class_name) = self._table_module_class_names_for_dataset_deprecated(
                    dataset_name
                )
                resources = Resources(kwargs.get("arguments", {}))
                resources.merge_with_defaults(
                    {
                        "dataset_name": dataset_name,
                        "in_table_name": table_name,
                        "out_table_name": table_name,
                        "id_name": id_name_default,
                    }
                )
                try:
                    dataset = Dataset(resources=resources)
                except:
                    logger.log_warning("Could not create a generic Dataset '%s'." % dataset_name)
                    raise
                # TODO: uncomment this warning when we change to singular
                # logger.log_warning("Dataset %s was created using deprecated table name - using plural will not be supported in the future." % dataset_name)
        return dataset
Beispiel #7
0
 def load(self,
          resources=None,
          in_storage=None,
          in_table_name=None,
          variables=[]):
     local_resources = Resources(resources)
     local_resources.merge_with_defaults({
         "field_submodel_id":
         self.field_submodel_id,
         "field_equation_id":
         self.field_equation_id,
         "field_coefficient_name":
         self.field_coefficient_name,
         "field_variable_name":
         self.field_variable_name,
         "field_fixed_value":
         self.field_fixed_value
     })
     if in_storage <> None:
         self.in_storage = in_storage
     if not isinstance(self.in_storage, Storage):
         logger.log_warning(
             "in_storage is not of type Storage. No EquationSpecification loaded."
         )
     else:
         data = self.in_storage.load_table(table_name=in_table_name)
         equations = array([-1])
         if local_resources["field_equation_id"] in data:
             equations = data[local_resources["field_equation_id"]]
         vars = data[local_resources["field_variable_name"]]
         self.variables = tuple(map(lambda x: VariableName(x), vars))
         self.coefficients = data[local_resources["field_coefficient_name"]]
         if local_resources["field_submodel_id"] in data:
             submodels = data[local_resources["field_submodel_id"]]
         else:
             submodels = array([-2] * self.coefficients.size, dtype="int32")
         self.submodels = submodels
         if equations.max() >= 0:
             self.equations = equations
         if local_resources["field_fixed_value"] in data:
             self.fixed_values = data[local_resources["field_fixed_value"]]
         for field in data:
             if field not in [
                     local_resources["field_submodel_id"],
                     local_resources["field_equation_id"],
                     local_resources["field_variable_name"],
                     local_resources["field_coefficient_name"],
                     local_resources["field_fixed_value"]
             ]:
                 self.other_fields[field] = data[field]
         self.set_other_dim_field_names()
         if variables:
             self.shrink(variables)
    def run(self,
            specification,
            coefficients,
            dataset,
            index=None,
            chunk_specification=None,
            data_objects=None,
            run_config=None,
            initial_values=None,
            procedure=None,
            debuglevel=0):
        """'specification' is of type EquationSpecification,
            'coefficients' is of type Coefficients,
            'dataset' is of type Dataset,
            'index' are indices of individuals in dataset for which
                        the model runs. If it is None, the whole dataset is considered.
            'chunk_specification' determines  number of chunks in which the simulation is processed.
            'data_objects' is a dictionary where each key is the name of an data object
            ('zone', ...) and its value is an object of class  Dataset.
           'run_config' is of type Resources, it gives additional arguments for the run.
           If 'procedure' is given, it overwrites the regression_procedure of the constructor.
           'initial_values' is an array of the initial values of the results. It will be overwritten
           by the results for those elements that are handled by the model (defined by submodels in the specification).
           By default the results are initialized with 0.
            'debuglevel' overwrites the constructor 'debuglevel'.
        """
        self.debug.flag = debuglevel
        if run_config == None:
            run_config = Resources()
        if not isinstance(run_config, Resources) and isinstance(
                run_config, dict):
            run_config = Resources(run_config)
        self.run_config = run_config.merge_with_defaults(self.run_config)
        self.run_config.merge({"debug": self.debug})
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.replace_dataset(dataset.get_dataset_name(), dataset)
        if procedure is not None:
            self.regression = RegressionModelFactory().get_model(
                name=procedure)
        if initial_values is None:
            self.initial_values = zeros((dataset.size(), ), dtype=float32)
        else:
            self.initial_values = zeros((dataset.size(), ),
                                        dtype=initial_values.dtype)
            self.initial_values[index] = initial_values

        if dataset.size() <= 0:  # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())

        result = ChunkModel.run(self,
                                chunk_specification,
                                dataset,
                                index,
                                float32,
                                specification=specification,
                                coefficients=coefficients)
        return result
    def estimate(self, specification, agent_set, agents_index=None, procedure=None, data_objects=None,
                  estimate_config=None, debuglevel=0):
        """ Computes capacity if required and calls the estimate method of ChoiceModel.
        See ChoiceModel.estimate for details on arguments.
        """
        if agents_index==None:
            agents_index=arange(agent_set.size())
        if agents_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return (None, None)

        if estimate_config == None:
            estimate_config = Resources()
        self.estimate_config = estimate_config.merge_with_defaults(self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if self.location_id_string is not None:
            agent_set.compute_variables(self.location_id_string, dataset_pool=self.dataset_pool)
        
        capacity_for_estimation = None
        if self.estimate_config.get("compute_capacity_flag", False):
            capacity_string_for_estimation = self.estimate_config.get("capacity_string", None)
            capacity_for_estimation = self.determine_capacity(capacity_string=capacity_string_for_estimation, 
                                                              agent_set=agent_set, 
                                                              agents_index=agents_index)

        self.estimate_config.merge({"capacity":capacity_for_estimation})
        return ChoiceModel.estimate(self,specification, agent_set,
                                    agents_index, procedure, estimate_config=self.estimate_config, 
                                    debuglevel=debuglevel)
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """ # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id":self.field_submodel_id,
            "field_equation_id":self.field_equation_id,
            "field_coefficient_name":self.field_coefficient_name,
            "field_variable_name":self.field_variable_name,
            "field_fixed_value":self.field_fixed_value,
            "out_table_name":out_table_name})
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning("out_storage has to be of type Storage. No EquationSpecifications written.")
            return

        submodel_ids = self.get_submodels()
        if submodel_ids.size == 0:
            submodel_ids = resize(array([-2], dtype="int32"), len(self.get_coefficient_names())) #set sub_model_id = -2 when there is no or 1 submodels

        equation_ids = self.get_equations()
        if equation_ids.size == 0:
            equation_ids = resize(array([-2], dtype="int32"), submodel_ids.size)

        values = {local_resources["field_submodel_id"]: submodel_ids,
               local_resources["field_equation_id"]:  equation_ids,
               local_resources["field_coefficient_name"]:  self.get_coefficient_names(),
               local_resources["field_variable_name"]:  self.get_long_variable_names()}
        if self.fixed_values.size > 0:
            values[local_resources["field_fixed_value"]] = self.fixed_values
        for field in self.other_fields.keys():
            values[field] = self.other_fields[field]

        types = {local_resources["field_submodel_id"]: 'integer',
               local_resources["field_equation_id"]:  'integer',
               local_resources["field_coefficient_name"]:  'text',
               local_resources["field_variable_name"]:  'text'}

        local_resources.merge({"values":values, 'valuetypes': types, "drop_table_flag":1})
        
        self.out_storage.write_table(table_name = local_resources['out_table_name'],
            table_data=local_resources['values']
            )
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """ # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id":self.field_submodel_id,
            "field_coefficient_name":self.field_coefficient_name,
            "field_estimate":self.field_estimate,
            "field_standard_error":self.field_standard_error,
            "other_fields":self.other_fields,
            "out_table_name":out_table_name})
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning("out_storage has to be of type Storage. No coefficients written.")
            return

        submodels = self.get_submodels()
        if submodels.size <= 0 :
            submodels = resize(array([-2], dtype=int32), self.size())
        values = {local_resources["field_submodel_id"]: submodels,
               local_resources["field_coefficient_name"]:  self.get_names(),
               local_resources["field_estimate"]:  self.get_values(),
               local_resources["field_standard_error"]:  self.get_standard_errors()}
        for measure in self.other_measures.keys():
            values[measure] = self.other_measures[measure]
        types = {local_resources["field_submodel_id"]: 'integer',
               local_resources["field_coefficient_name"]:  'text',
               local_resources["field_estimate"]:  'double',
               local_resources["field_standard_error"]:  'double'}
        attrtypes = {local_resources["field_submodel_id"]: AttributeType.PRIMARY,
               local_resources["field_coefficient_name"]:  AttributeType.PRIMARY,
               local_resources["field_estimate"]:  AttributeType.PRIMARY,
               local_resources["field_standard_error"]: AttributeType.PRIMARY}
        for measure in self.other_measures.keys():
            types[measure]= 'double'
            attrtypes[measure] = AttributeType.PRIMARY
        local_resources.merge({"values":values, 'valuetypes': types, "drop_table_flag":1,
                               "attrtype":attrtypes})
        
        self.out_storage.write_table(table_name=local_resources['out_table_name'],
            table_data = local_resources['values'])       
 def get_resources_for_dataset(self, 
           dataset_name, 
           in_storage,
           out_storage,
           resources={},
           in_table_name_pair=(None,None),
           out_table_name_pair=(None,None),
           attributes_pair=(None,None), 
           id_name_pair=(None,None), 
           nchunks_pair=(None,None), 
           debug_pair=(None,None)
           ):
                         
     """Create an object of class Resources to be used in a Dataset object. 
     The created resources are merged with the resources given as an argument 'resources'. 
     The first element
     of each tuple of the remaining arguments contains the desired value, the second element contains 
     the default value which is used if the first element is None. 
     Entries in resources of the same name as the argument values are overwritten if the one of the 
     tuple values is not equal None.
     """
         
     # merge resources with arguments
     local_resources = Resources(resources)
     local_resources.merge_if_not_None({
             "in_storage":in_storage,
             "out_storage":out_storage,
             "nchunks":nchunks_pair[0], "attributes":attributes_pair[0],
             "in_table_name": in_table_name_pair[0], "out_table_name": out_table_name_pair[0],
             "id_name":id_name_pair[0], "debug":debug_pair[0],
             "dataset_name":dataset_name})
         
     # merge resources with default values    
     local_resources.merge_with_defaults({
             "nchunks":nchunks_pair[1], "attributes":attributes_pair[1],
             "in_table_name":in_table_name_pair[1], "out_table_name":out_table_name_pair[1],
             "id_name":id_name_pair[1], "debug":debug_pair[1],
             "dataset_name":dataset_name})
         
     return local_resources
Beispiel #13
0
 def get_resources_for_dataset(self, 
           dataset_name, 
           in_storage,
           out_storage,
           resources={},
           in_table_name_pair=(None,None),
           out_table_name_pair=(None,None),
           attributes_pair=(None,None), 
           id_name_pair=(None,None), 
           nchunks_pair=(None,None), 
           debug_pair=(None,None)
           ):
                         
     """Create an object of class Resources to be used in a Dataset object. 
     The created resources are merged with the resources given as an argument 'resources'. 
     The first element
     of each tuple of the remaining arguments contains the desired value, the second element contains 
     the default value which is used if the first element is None. 
     Entries in resources of the same name as the argument values are overwritten if the one of the 
     tuple values is not equal None.
     """
         
     # merge resources with arguments
     local_resources = Resources(resources)
     local_resources.merge_if_not_None({
             "in_storage":in_storage,
             "out_storage":out_storage,
             "nchunks":nchunks_pair[0], "attributes":attributes_pair[0],
             "in_table_name": in_table_name_pair[0], "out_table_name": out_table_name_pair[0],
             "id_name":id_name_pair[0], "debug":debug_pair[0],
             "dataset_name":dataset_name})
         
     # merge resources with default values    
     local_resources.merge_with_defaults({
             "nchunks":nchunks_pair[1], "attributes":attributes_pair[1],
             "in_table_name":in_table_name_pair[1], "out_table_name":out_table_name_pair[1],
             "id_name":id_name_pair[1], "debug":debug_pair[1],
             "dataset_name":dataset_name})
         
     return local_resources
    def run(self, specification, coefficients, agent_set,
            agents_index=None, chunk_specification=None,
            data_objects=None, run_config=None, debuglevel=0):
        """ Run a simulation and return a numpy array of length agents_index, giving agent choices (ids of locations).
            'specification' is of type EquationSpecification,
            'coefficients' is of type Coefficients,
            'agent_set' is of type Dataset,
            'agent_index' are indices of individuals in the agent_set for which
                        the model runs. If it is None, the whole agent_set is considered.
            'chunk_specification' determines number of chunks in which the simulation is processed.
                        Default is to use 300 rows per chunk.
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'run_config' is of type Resources, it gives additional arguments for the run.
            'debuglevel' overwrites the constructor 'debuglevel'.
        """
        if run_config == None:
            run_config = Resources()
        self.run_config = run_config.merge_with_defaults(self.run_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included({agent_set.get_dataset_name():agent_set})
        
        ## what is the use of compute location_id string in run? it gets new values anyway
        #if self.location_id_string is not None:
        #    location_id = agent_set.compute_variables(self.location_id_string, dataset_pool=self.dataset_pool)

        ## done in choice_model
        #location_id_name = self.choice_set.get_id_name()[0]
        #if (location_id_name not in agent_set.get_known_attribute_names()):
        #    agent_set.add_attribute(name=location_id_name, data=resize(array([-1]), agent_set.size()))
                    
        if self.run_config.get("agent_units_string", None): # used when agents take different amount of capacity from the total capacity
            agent_set.compute_variables([self.run_config["agent_units_string"]], dataset_pool=self.dataset_pool)

        self.compute_capacity_flag = self.run_config.get("compute_capacity_flag",  False)
        capacity_string = None
        self.capacity = None
        if self.compute_capacity_flag:
            capacity_string = self.run_config.get("capacity_string", None)
            if capacity_string is None:
                raise KeyError, \
                    "Entry 'capacity_string' has to be specified in 'run_config' if 'compute_capacity_flag' is True"
            
        ## if weights is None, use capacity for weights
        if self.run_config.get("weights_for_simulation_string", None) is None and capacity_string is not None:
            self.run_config.merge({"weights_for_simulation_string" : capacity_string})
            
        return ChoiceModel.run(self,specification, coefficients, agent_set,
                agents_index=agents_index, chunk_specification=chunk_specification, run_config=self.run_config,
                debuglevel=debuglevel)
 def load(self, resources=None, in_storage=None, in_table_name=None, variables = []):
     local_resources = Resources(resources)
     local_resources.merge_with_defaults({
         "field_submodel_id":self.field_submodel_id,
         "field_equation_id":self.field_equation_id,
         "field_coefficient_name":self.field_coefficient_name,
         "field_variable_name":self.field_variable_name,
         "field_fixed_value":self.field_fixed_value})
     if in_storage <> None:
         self.in_storage = in_storage
     if not isinstance(self.in_storage, Storage):
         logger.log_warning("in_storage is not of type Storage. No EquationSpecification loaded.")
     else:
         data = self.in_storage.load_table(table_name=in_table_name)
         equations=array([-1])
         if local_resources["field_equation_id"] in data:
             equations = data[local_resources["field_equation_id"]]
         vars=data[local_resources["field_variable_name"]]
         self.variables=tuple(map(lambda x: VariableName(x), vars))
         self.coefficients=data[local_resources["field_coefficient_name"]]
         if local_resources["field_submodel_id"] in data:
             submodels = data[local_resources["field_submodel_id"]]
         else:
             submodels = array([-2]*self.coefficients.size, dtype="int32")
         self.submodels=submodels
         if equations.max() >= 0:
             self.equations=equations
         if local_resources["field_fixed_value"] in data:
             self.fixed_values = data[local_resources["field_fixed_value"]]
         for field in data:
             if field not in [local_resources["field_submodel_id"], local_resources["field_equation_id"],
                              local_resources["field_variable_name"], local_resources["field_coefficient_name"],
                              local_resources["field_fixed_value"]]:
                 self.other_fields[field] = data[field]
         self.set_other_dim_field_names()
         if variables:
             self.shrink(variables)
    def run(self, specification, coefficients, agent_set,
            agents_index=None, agents_filter=None,
            chunk_specification=None, data_objects=None,
            run_config=None, debuglevel=0, maximum_runs=10):

        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if agents_index is None:
            if agents_filter is not None:
                agent_set.compute_variables(agents_filter, dataset_pool=self.dataset_pool)
                agents_index = where(agent_set.get_attribute(VariableName(agents_filter).get_alias()))[0]
            else:
                agents_index = arange(agent_set.size())
        if not isinstance(agents_index, ndarray):
            try:
                agents_index = array(agents_index)
            except:
                raise TypeError, "Argument agents_index is of wrong type (numpy array or list allowed.)"

        if agents_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return array([], dtype='int32')

        if run_config == None:
            run_config = Resources()
        self.run_config = run_config.merge_with_defaults(self.run_config)
        self.number_of_units_string = self.run_config.get("number_of_units_string", None)
        self.number_of_agents_string = self.run_config.get(
                        "number_of_agents_string",
                        "%s.number_of_agents(%s)" % (self.choice_set.get_dataset_name(), agent_set.get_dataset_name()))
            
        if self.number_of_units_string is None:
            maximum_runs = 1
        unplaced = arange(agents_index.size)
        id_name = self.choice_set.get_id_name()[0]
        for run in range(maximum_runs):
            unplaced_size_before_model = unplaced.size
            choices = LocationChoiceModel.run(self, specification, coefficients, agent_set,
                    agents_index[unplaced], chunk_specification, debuglevel=debuglevel)
            if run == 0:
                all_choices=choices
            else:
                all_choices[unplaced]=choices
            unplaced = self.get_movers_from_overfilled_locations(agent_set, agents_index, config=run_config)
            if (unplaced.size <= 0) or (unplaced_size_before_model == unplaced.size) or (unplaced.size == (unplaced_size_before_model - self.observations_mapping['mapped_index'].size)):
                break
            agent_set.set_values_of_one_attribute(id_name, -1, agents_index[unplaced])
        return all_choices
Beispiel #17
0
 def run(self, specification, coefficients, dataset, index=None, chunk_specification=None,
         data_objects=None, run_config=None, initial_values=None, procedure=None, debuglevel=0):
     """'specification' is of type EquationSpecification,
         'coefficients' is of type Coefficients,
         'dataset' is of type Dataset,
         'index' are indices of individuals in dataset for which
                     the model runs. If it is None, the whole dataset is considered.
         'chunk_specification' determines  number of chunks in which the simulation is processed.
         'data_objects' is a dictionary where each key is the name of an data object
         ('zone', ...) and its value is an object of class  Dataset.
        'run_config' is of type Resources, it gives additional arguments for the run.
        If 'procedure' is given, it overwrites the regression_procedure of the constructor.
        'initial_values' is an array of the initial values of the results. It will be overwritten
        by the results for those elements that are handled by the model (defined by submodels in the specification).
        By default the results are initialized with 0.
         'debuglevel' overwrites the constructor 'debuglevel'.
     """
     self.debug.flag = debuglevel
     if run_config == None:
         run_config = Resources()
     if not isinstance(run_config,Resources) and isinstance(run_config, dict):
         run_config = Resources(run_config)
     self.run_config = run_config.merge_with_defaults(self.run_config)
     self.run_config.merge({"debug":self.debug})
     if data_objects is not None:
         self.dataset_pool.add_datasets_if_not_included(data_objects)
     self.dataset_name = dataset.get_dataset_name()
     self.dataset_pool.replace_dataset(self.dataset_name, dataset)
     
     if procedure is not None: 
         self.regression = RegressionModelFactory().get_model(name=procedure)
     if initial_values is None:
         self.initial_values = zeros((dataset.size(),), dtype=float32)
     else:
         self.initial_values = zeros((dataset.size(),), dtype=initial_values.dtype)
         self.initial_values[index] = initial_values
         
     if dataset.size()<=0: # no data loaded yet
         dataset.get_id_attribute()
     if index == None:
         index = arange(dataset.size())
         
     result = ChunkModel.run(self, chunk_specification, dataset, index, float32,
                              specification=specification, coefficients=coefficients)
     return result
    def estimate(self,
                 specification,
                 agent_set,
                 agents_index=None,
                 procedure=None,
                 data_objects=None,
                 estimate_config=None,
                 debuglevel=0):
        """ Computes capacity if required and calls the estimate method of ChoiceModel.
        See ChoiceModel.estimate for details on arguments.
        """
        if agents_index == None:
            agents_index = arange(agent_set.size())
        if agents_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return (None, None)

        if estimate_config == None:
            estimate_config = Resources()
        self.estimate_config = estimate_config.merge_with_defaults(
            self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if self.location_id_string is not None:
            agent_set.compute_variables(self.location_id_string,
                                        dataset_pool=self.dataset_pool)

        capacity_for_estimation = None
        if self.estimate_config.get("compute_capacity_flag", False):
            capacity_string_for_estimation = self.estimate_config.get(
                "capacity_string", None)
            capacity_for_estimation = self.determine_capacity(
                capacity_string=capacity_string_for_estimation,
                agent_set=agent_set,
                agents_index=agents_index)

        self.estimate_config.merge({"capacity": capacity_for_estimation})
        return ChoiceModel.estimate(self,
                                    specification,
                                    agent_set,
                                    agents_index,
                                    procedure,
                                    estimate_config=self.estimate_config,
                                    debuglevel=debuglevel)
Beispiel #19
0
    def run(self,
            dataset1,
            dataset2,
            index1=None,
            index2=None,
            sample_size=10,
            weight=None,
            include_chosen_choice=None,
            with_replacement=True,
            resources=None,
            dataset_pool=None):
        """
        
        
        this function samples number of sample_size (scalar value) alternatives from dataset2
        for agent set specified by dataset1.
        If index1 is not None, only samples alterantives for agents with indices in index1;
        if index2 is not None, only samples alternatives from indices in index2.
        sample_size specifies number of alternatives to be sampled for each agent.
        weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d
        array of the same length as index2 or 2d array of shape (index1.size, index2.size).

        Also refer to document of interaction_dataset"""

        if dataset_pool is None:
            sc = SessionConfiguration()
            try:
                dataset_pool = sc.get_dataset_pool()
            except:
                dataset_pool = DatasetPool(sc.package_order)

        local_resources = Resources(resources)
        local_resources.merge_if_not_None({
            "dataset1":
            dataset1,
            "dataset2":
            dataset2,
            "index1":
            index1,
            "index2":
            index2,
            "sample_size":
            sample_size,
            "weight":
            weight,
            "with_replacement":
            with_replacement,
            "include_chosen_choice":
            include_chosen_choice
        })

        local_resources.check_obligatory_keys(
            ['dataset1', 'dataset2', 'sample_size'])
        agent = local_resources["dataset1"]
        choice = local_resources["dataset2"]
        index1 = local_resources.get("index1", None)
        if index1 is None:
            index1 = arange(agent.size())
        index2 = local_resources.get("index2", None)
        if index2 is None:
            index2 = arange(choice.size())

        if index1.size == 0 or index2.size == 0:
            err_msg = "either choice size or agent size is zero, return None"
            logger.log_warning(err_msg)
            return (None, None)

        agent_category_definition = local_resources.get(
            "agent_category_definition", [])
        choice_category_definition = local_resources.get(
            "choice_category_definition", [])
        agent_filter_attribute = local_resources.get("agent_filter_attribute",
                                                     None)
        category_inflating_factor = local_resources.get(
            "category_inflating_factor", 10)

        frequency, unique_agent_category_id, unique_choice_category_id, agent_category_id, choice_category_id = \
                get_category_and_frequency(agent, agent_category_definition,
                                           choice, choice_category_definition,
                                           agent_filter_attribute, category_inflating_factor,
                                           dataset_pool=dataset_pool)

        include_chosen_choice = local_resources.get("include_chosen_choice",
                                                    False)
        chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1]
        chosen_choice_index = choice.try_get_id_index(
            chosen_choice_id, return_value_if_not_found=-1)
        chosen_choice_index_to_index2 = lookup(chosen_choice_index,
                                               index2,
                                               index_if_not_found=UNPLACED_ID)

        J = local_resources["sample_size"]
        if include_chosen_choice:
            J = J - 1
        local_resources.merge_with_defaults(
            {'with_replacement': with_replacement})
        with_replacement = local_resources.get("with_replacement")

        sampled_index = empty((index1.size, J), dtype=DTYPE)
        sampling_prob = empty((index1.size, J), dtype="float64")

        _digitize, _where, _normalize = digitize, where, normalize
        _ncumsum, _rand, _searchsorted = ncumsum, rand, searchsorted  #speed hack
        for i in range(unique_agent_category_id.size):
            category_id = unique_agent_category_id[i]
            agents_in_this_category = _where(
                agent_category_id[index1] == category_id)[0]
            num_agents = agents_in_this_category.size
            if num_agents == 0: continue
            #import pdb; pdb.set_trace()

            ## divide frequency by the mean frequency to avoid overflow
            weights = frequency[
                i,
                _digitize(choice_category_id[index2], unique_choice_category_id
                          ) - 1] / frequency[i, :].mean()
            prob = _normalize(weights)
            index = _searchsorted(_ncumsum(prob),
                                  _rand(num_agents * J)).reshape(-1, J)

            if not with_replacement:
                raise NotImplementedError, "Sample without replacement is not implemented for this sampler yet."
                #    nz = nonzero(prob)[0].size
                #    if J < nz:
                #        ## number of non zero weight less than alternatives, sample with replacement
                #        logger.log_warning("There are %s non zero weights and are less than the number of alternatives proposed %s. " % (nz, J) +
                #                           "Sample with replacement instead.")
                #        continue
                #    i=0; max_iterations=200
                #    while True:
                #        index = sort(index, axis=1)
                #        where_repeats = nonzero( logical_not(diff(index, axis=1)) )
                #        num_repeats = where_repeats[0].size
                #        if num_repeats == 0: break
                #        index[where_repeats] = _searchsorted(_rand(num_repeats), prob)
                #        i += 1
                #        if i > max_iterations:
                #            logger.log_warning("weight_sampler_by_category is unable to sample %i alternatives without replacement in %i iterations; " % \
                #                               (J, max_iterations) +
                #                               "give up sampling without replacement and results may contain replacement."
                #                              )
                #            break

            sampled_index[agents_in_this_category, :] = index
            sampling_prob[agents_in_this_category, :] = prob[index]

        sampled_index = index2[sampled_index]
        is_chosen_choice = zeros(sampled_index.shape, dtype="bool")
        #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32")
        if include_chosen_choice:
            sampled_index = column_stack(
                (chosen_choice_index[:, newaxis], sampled_index))
            is_chosen_choice[chosen_choice_index != UNPLACED_ID, 0] = 1

            sampling_prob_for_chosen_choices = take(
                prob, chosen_choice_index_to_index2[:, newaxis])
            ## if chosen choice chosen is unplaced has the sampling prob is 0
            sampling_prob_for_chosen_choices[where(
                chosen_choice_index == UNPLACED_ID)[0], ] = 0.0
            sampling_prob = column_stack(
                [sampling_prob_for_chosen_choices, sampling_prob])

        #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1]

        interaction_dataset = self.create_interaction_dataset(
            dataset1, dataset2, index1, sampled_index)
        interaction_dataset.add_attribute(sampling_prob,
                                          '__sampling_probability')
        interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice')

        ## to get the older returns
        #sampled_index = interaction_dataset.get_2d_index()
        #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32")
        #where_chosen = where(interaction_dataset.get_attribute("chosen_choice"))
        #chosen_choices[where_chosen[0]]=where_chosen[1]
        #return (sampled_index, chosen_choice)

        return interaction_dataset
    def run(self, dataset1, dataset2, index1=None, index2=None, sample_size=10, weight=None,
            include_chosen_choice=None, with_replacement=True, resources=None, dataset_pool=None):
        """
        
        
        this function samples number of sample_size (scalar value) alternatives from dataset2
        for agent set specified by dataset1.
        If index1 is not None, only samples alterantives for agents with indices in index1;
        if index2 is not None, only samples alternatives from indices in index2.
        sample_size specifies number of alternatives to be sampled for each agent.
        weight, to be used as sampling weight, is either an attribute name of dataset2, or a 1d
        array of the same length as index2 or 2d array of shape (index1.size, index2.size).

        Also refer to document of interaction_dataset"""

        if dataset_pool is None:
            sc = SessionConfiguration()
            try:
                dataset_pool=sc.get_dataset_pool()
            except:
                dataset_pool = DatasetPool(sc.package_order)

        local_resources = Resources(resources)
        local_resources.merge_if_not_None(
                {"dataset1": dataset1, "dataset2": dataset2,
                "index1":index1, "index2": index2,
                "sample_size": sample_size, "weight": weight,
                "with_replacement": with_replacement,
                "include_chosen_choice": include_chosen_choice})

        local_resources.check_obligatory_keys(['dataset1', 'dataset2', 'sample_size'])
        agent = local_resources["dataset1"]
        choice = local_resources["dataset2"]
        index1 = local_resources.get("index1", None)
        if index1 is None:
            index1 = arange(agent.size())
        index2 = local_resources.get("index2", None)
        if index2 is None:
            index2 = arange(choice.size())
            
        if index1.size == 0 or index2.size == 0:
            err_msg = "either choice size or agent size is zero, return None"
            logger.log_warning(err_msg)
            return (None, None)        

        agent_category_definition = local_resources.get("agent_category_definition", [])
        choice_category_definition = local_resources.get("choice_category_definition", [])
        agent_filter_attribute = local_resources.get("agent_filter_attribute", None)
        category_inflating_factor = local_resources.get("category_inflating_factor", 10)

        frequency, unique_agent_category_id, unique_choice_category_id, agent_category_id, choice_category_id = \
                get_category_and_frequency(agent, agent_category_definition,
                                           choice, choice_category_definition,
                                           agent_filter_attribute, category_inflating_factor,
                                           dataset_pool=dataset_pool)
         
        include_chosen_choice = local_resources.get("include_chosen_choice",  False)
        chosen_choice_id = agent.get_attribute(choice.get_id_name()[0])[index1]
        chosen_choice_index = choice.try_get_id_index(chosen_choice_id, return_value_if_not_found=-1)
        chosen_choice_index_to_index2 = lookup(chosen_choice_index, index2, index_if_not_found=UNPLACED_ID)
        
        J = local_resources["sample_size"]
        if include_chosen_choice:
            J = J - 1
        local_resources.merge_with_defaults({'with_replacement': with_replacement})
        with_replacement = local_resources.get("with_replacement")
        
        sampled_index = empty((index1.size, J), dtype="int32")
        sampling_prob = empty((index1.size, J), dtype="float64")
        
        _digitize, _where,  _normalize = digitize, where, normalize
        _ncumsum, _rand, _searchsorted = ncumsum, rand, searchsorted   #speed hack
        for i in range(unique_agent_category_id.size):
            category_id = unique_agent_category_id[i]
            agents_in_this_category = _where(agent_category_id[index1] == category_id)[0]
            num_agents = agents_in_this_category.size
            if num_agents == 0: continue
            #import pdb; pdb.set_trace()
            
            ## divide frequency by the mean frequency to avoid overflow
            weights = frequency[i, _digitize(choice_category_id[index2], unique_choice_category_id)-1]  / frequency[i, :].mean()
            prob = _normalize(weights)
            index = _searchsorted(_ncumsum(prob), _rand(num_agents * J)).reshape(-1, J)

            if not with_replacement:
                raise NotImplementedError, "Sample without replacement is not implemented for this sampler yet."
                #    nz = nonzero(prob)[0].size
                #    if J < nz:
                    #        ## number of non zero weight less than alternatives, sample with replacement
                    #        logger.log_warning("There are %s non zero weights and are less than the number of alternatives proposed %s. " % (nz, J) + 
                    #                           "Sample with replacement instead.")
                    #        continue
                    #    i=0; max_iterations=200
                    #    while True:
                        #        index = sort(index, axis=1)
                        #        where_repeats = nonzero( logical_not(diff(index, axis=1)) ) 
                        #        num_repeats = where_repeats[0].size
                        #        if num_repeats == 0: break
                        #        index[where_repeats] = _searchsorted(_rand(num_repeats), prob)
                        #        i += 1
                        #        if i > max_iterations:
                            #            logger.log_warning("weight_sampler_by_category is unable to sample %i alternatives without replacement in %i iterations; " % \
                                    #                               (J, max_iterations) + 
                            #                               "give up sampling without replacement and results may contain replacement."
                            #                              )
                            #            break

            sampled_index[agents_in_this_category, :] = index
            sampling_prob[agents_in_this_category, :] = prob[index] 

        sampled_index = index2[sampled_index]
        is_chosen_choice = zeros(sampled_index.shape, dtype="bool")
        #chosen_choice = -1 * ones(chosen_choice_index.size, dtype="int32")
        if include_chosen_choice:
            sampled_index = column_stack((chosen_choice_index[:,newaxis],sampled_index))
            is_chosen_choice[chosen_choice_index!=UNPLACED_ID, 0] = 1
            
            sampling_prob_for_chosen_choices = take(prob, chosen_choice_index_to_index2[:, newaxis])
            ## if chosen choice chosen is unplaced has the sampling prob is 0
            sampling_prob_for_chosen_choices[where(chosen_choice_index==UNPLACED_ID)[0],] = 0.0
            sampling_prob = column_stack([sampling_prob_for_chosen_choices, sampling_prob])
            
        #chosen_choice[where(is_chosen_choice)[0]] = where(is_chosen_choice)[1]
        
        interaction_dataset = self.create_interaction_dataset(dataset1, dataset2, index1, sampled_index)
        interaction_dataset.add_attribute(sampling_prob, '__sampling_probability')
        interaction_dataset.add_attribute(is_chosen_choice, 'chosen_choice')

        ## to get the older returns
        #sampled_index = interaction_dataset.get_2d_index()
        #chosen_choices = UNPLACED_ID * ones(index1.size, dtype="int32") 
        #where_chosen = where(interaction_dataset.get_attribute("chosen_choice"))
        #chosen_choices[where_chosen[0]]=where_chosen[1]
        #return (sampled_index, chosen_choice)
        
        return interaction_dataset
    def run(self, specification, coefficients, agent_set,
            agents_index=None, agents_filter=None,
            chunk_specification=None, data_objects=None,
            run_config=None, debuglevel=0, maximum_runs=10):

        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)

        bindex = zeros(agent_set.size(), dtype='b')
        if agents_filter is not None:
            bfilter = agent_set.compute_variables(agents_filter, 
                                                  dataset_pool=self.dataset_pool)

            if agents_index is not None:
                bindex[agents_index] = True
                agents_index = where(bindex * bfilter)[0]
            else:
                agents_index = where(bfilter)[0]
        else:
            if agents_index is not None:
                agents_index = agents_index
            else:
                agents_index = arange(agent_set.size())

        if not isinstance(agents_index, ndarray):
            try:
                agents_index = array(agents_index)
            except:
                raise TypeError, "Argument agents_index is of wrong type (numpy array or list allowed.)"

        if agents_index.size == 0:
            logger.log_status("Nothing to be done.")
            return array([], dtype='int32')

        if run_config == None:
            run_config = Resources()
        self.run_config = run_config.merge_with_defaults(self.run_config)
        #this is handled by choices module (in UPC sequence)
        self.number_of_units_string = self.run_config.get("number_of_units_string", None)
        #self.number_of_agents_string = self.run_config.get(
        #                "number_of_agents_string",
        #                "%s.number_of_agents(%s)" % (self.choice_set.get_dataset_name(), agent_set.get_dataset_name()))
            
        if self.number_of_units_string is None:
            maximum_runs = 1

        unplaced = ones_like(agents_index).astype('bool')
        #boolean of the same shape as agents_index
        end_choices = -1 * ones_like(agents_index)
        id_name = self.choice_set.get_id_name()[0]
        demand_string = self.run_config.get("demand_string")
        supply_string = self.run_config.get("supply_string")
        for run in range(maximum_runs):
            unplaced_size_before = unplaced.sum()
            choices = LocationChoiceModel.run(self, 
                                              specification=specification, 
                                              coefficients=coefficients, 
                                              agent_set=agent_set,
                                              agents_index=agents_index[unplaced], 
                                              chunk_specification=chunk_specification, 
                                              debuglevel=debuglevel)
            end_choices[unplaced] = choices
            if run > 0:
                ## delete demand_string and supply string for later iterations to 
                ## avoid these variables being distorted by assigning overfilled agents
                if demand_string: del self.run_config["demand_string"]
                if supply_string: del self.run_config["supply_string"]
                
            unplaced = agent_set[id_name][agents_index] <= 0
            ## these two lines are inside the loop because self.observations_mapping is 
            ## not initialized before calling LocationChoiceModel.run
            agents_size_mapped = self.observations_mapping['mapped_index'].size
            agents_size_unmapped = agents_index.size - agents_size_mapped
            
            logger.log_status("Agent Location Choice Model iteration %s/%s: %s unplaced agents" % \
                              (run+1, maximum_runs, unplaced.sum()))
            if unplaced.sum() in (0, unplaced_size_before, agents_size_unmapped):
                logger.log_status("All agents placed or number of unplaced agents doesn't change; exit ALCM.")
                break

            agent_set.set_values_of_one_attribute(id_name, -1, agents_index[unplaced])
            
        if demand_string: self.run_config["demand_string"] = demand_string
        if supply_string: self.run_config["supply_string"] = supply_string
        
        return end_choices
    def estimate(self, specification, dataset, outcome_attribute, index = None, procedure=None, data_objects=None,
                        estimate_config=None,  debuglevel=0):
        """'specification' is of type EquationSpecification,
            'dataset' is of type Dataset,
            'outcome_attribute' - string that determines the dependent variable,
            'index' are indices of individuals in dataset for which
                    the model runs. If it is None, the whole dataset is considered.
            'procedure' - name of the estimation procedure. If it is None,
                there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class
                must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'.
                It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays).
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure.
            'debuglevel' overwrites the class 'debuglevel'.
        """
        #import wingdbstub
        self.debug.flag = debuglevel
        if estimate_config == None:
            estimate_config = Resources()
        if not isinstance(estimate_config,Resources) and isinstance(estimate_config, dict):
            estimate_config = Resources(estimate_config)
        self.estimate_config = estimate_config.merge_with_defaults(self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.procedure=procedure
        if self.procedure == None:
            self.procedure = self.estimate_config.get("estimation", None)
        if self.procedure is not None:
            self.procedure = ModelComponentCreator().get_model_component(self.procedure)
        else:
            logger.log_warning("No estimation procedure given, or problems with loading the corresponding module.")

        compute_resources = Resources({"debug":self.debug})
        if dataset.size()<=0: # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())
        if not isinstance(index,ndarray):
            index=array(index)

        estimation_size_agents = self.estimate_config.get("estimation_size_agents", None) # should be a proportion of the agent_set
        if estimation_size_agents == None:
            estimation_size_agents = 1.0
        else:
            estimation_size_agents = max(min(estimation_size_agents,1.0),0.0) # between 0 and 1

        if estimation_size_agents < 1.0:
            self.debug.print_debug("Sampling agents for estimation ...",3)
            estimation_idx = sample_noreplace(arange(index.size),
                                                         int(index.size*estimation_size_agents))
        else:
            estimation_idx = arange(index.size)

        estimation_idx = index[estimation_idx]
        self.debug.print_debug("Number of observations for estimation: " + str(estimation_idx.size),2)
        if estimation_idx.size <= 0:
            self.debug.print_debug("Nothing to be done.",2)
            return (None, None)

        coefficients = create_coefficient_from_specification(specification)
        self.specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1)
        submodels = self.specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(submodels, self.submodel_string, dataset, estimation_idx,
                                      dataset_pool=self.dataset_pool, resources = compute_resources,
                                      submodel_size_max=self.estimate_config.get('submodel_size_max', None))
        variables = self.specified_coefficients.get_full_variable_names_without_constants()
        self.debug.print_debug("Compute variables ...",4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources = compute_resources)

        coef = {}
        estimated_coef={}
        self.outcome = {}
        dataset.compute_variables([outcome_attribute], dataset_pool=self.dataset_pool, resources=compute_resources)
        regression_resources=Resources(estimate_config)
        regression_resources.merge({"debug":self.debug})
        outcome_variable_name = VariableName(outcome_attribute)
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(self.specified_coefficients,submodel)
            self.increment_current_status_piece()
            logger.log_status("Estimate regression for submodel " +str(submodel),
                               tags=["estimate"], verbosity_level=2)
            #logger.log_status("Number of observations: " +str(self.observations_mapping[submodel].size),
                               #tags=["estimate"], verbosity_level=2)
            self.data[submodel] = dataset.create_regression_data_for_estimation(coef[submodel],
                                                            index = estimation_idx[self.observations_mapping[submodel]])
            self.coefficient_names[submodel] = coef[submodel].get_coefficient_names_without_constant()[0,:]
            if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0) and (self.procedure is not None): # observations for this submodel available
                self.outcome[submodel] = dataset.get_attribute_by_index(outcome_variable_name.get_alias(), estimation_idx[self.observations_mapping[submodel]])   
                regression_resources.merge({"outcome":  self.outcome[submodel]})
                regression_resources.merge({"coefficient_names":self.coefficient_names[submodel].tolist(),
                            "constant_position": coef[submodel].get_constants_positions()})
                regression_resources.merge({"submodel": submodel})
                estimated_coef[submodel] = self.procedure.run(self.data[submodel], self.regression,
                                                        resources=regression_resources)
                if "estimators" in estimated_coef[submodel].keys():
                    coef[submodel].set_coefficient_values(estimated_coef[submodel]["estimators"])
                if "standard_errors" in estimated_coef[submodel].keys():
                    coef[submodel].set_standard_errors(estimated_coef[submodel]["standard_errors"])
                if "other_measures" in estimated_coef[submodel].keys():
                    for measure in estimated_coef[submodel]["other_measures"].keys():
                        coef[submodel].set_measure(measure,
                              estimated_coef[submodel]["other_measures"][measure])
                if "other_info" in estimated_coef[submodel].keys():
                    for info in estimated_coef[submodel]["other_info"]:
                        coef[submodel].set_other_info(info,
                              estimated_coef[submodel]["other_info"][info])
        coefficients.fill_coefficients(coef)
        self.specified_coefficients.coefficients = coefficients
        self.save_predicted_values_and_errors(specification, coefficients, dataset, outcome_variable_name, index=index, data_objects=data_objects)
            
        return (coefficients, estimated_coef)
Beispiel #23
0
class PandasDataset(Dataset):
    """
    This is under construction.
    It is an attempt to have an analogous to an Opus Dataset that would use 
    Pandas DataFrame. The actual data is stored in an attribute called df 
    which is a DataFrame and is indexed by the dataset's unique identifier. 
    The dataset can be created from the same inputs as Opus dataset.
    Alternatively, it can be created from an existing Opus dataset using 
    the constructor PandasClassFactory.
    """
    def __init__(self, create_from_data=True, **kwargs):
        if create_from_data:
            self.create_from_data(**kwargs)

    
    def create_from_data(self, resources=None, id_name=None, in_storage=None, dataset_name=None,
            out_storage=None, in_table_name=None, out_table_name=None):
        self.resources = Resources(resources)
        self.resources.merge_if_not_None({ "id_name":id_name,
                            "dataset_name":dataset_name,
                            "in_storage":in_storage,
                            "out_storage":out_storage,
                            "in_table_name":in_table_name,
                            "out_table_name":out_table_name})
        self.resources.merge_with_defaults({"dataset_name":"dataset"})
        self.dataset_name = self.resources.get("dataset_name", None)
        self.attribute_cache = AttributeCache()
        self._aliases = {}
        self._id_names = self.resources.get("id_name", [])
        if not isinstance(self._id_names, list):
            self._id_names = [self._id_names]
        self.variable_factory = VariableFactory()
        self.debug = self.resources.get("debug",  0)
        self.df = pd.DataFrame(self.resources.get('in_storage').load_table(self.resources.get('in_table_name')))
        self._primary_attribute_names = self.get_attribute_names()
        self.df.set_index(self._id_names, inplace=True)
        self.attribute_boxes = {}
        for attr in self._primary_attribute_names:
            self.attribute_boxes[attr] = AttributeBox(self, [],
                                                variable_name=self.create_and_check_qualified_variable_name(attr),
                                                type=AttributeType.PRIMARY,
                                                is_in_memory=True,
                                                header=None,
                                                version=0)
        self.n = self.df.shape[0]
            
    def __getitem__(self, attr):
        """ dataset[attr]
        """
        return self.get_attribute(attr)

    def __setitem__(self, attr, values):
        """ dataset[attr] = values
        """
        self.df[attr] = values

    def get_attribute(self, name):
        if isinstance(name, VariableName):
            name = name.get_alias()
        else:
            name = VariableName(name).get_alias()
        if name in self.get_id_name():
            return self.get_id_attribute()
        return self.df[name].values
    
    def get_id_attribute(self):
        return self.df.index.values
    
    def get_attribute_by_id(self, name, id):
        return self.df[name][id]
    
    def get_attribute_names(self):
        return self.df.columns
    
    def _do_flush_attribute(self, name):
        """For now don't do anything."""
        pass
        
    def load_dataset(self, resources=None, attributes=None, in_storage=None,
                     in_table_name=None, lowercase=None, **kwargs):

        #set defaults
        attributes_default = '*'
        lower_default = 1 # if 1, use lowercase for attribute names

        # merge arguments with dictionaries and add missing entries
        local_resources = Resources(self.resources)
        if resources is not None:
            local_resources.merge_if_not_None(resources)
        local_resources.merge_if_not_None({"attributes":attributes,
                                           "in_storage":in_storage,
                                           "in_table_name":in_table_name,
                                           "lowercase":lowercase})
        local_resources.merge_with_defaults({"attributes":attributes_default,
                                             "lowercase":lower_default,
                                            })

        # check obligatory entries
        local_resources.check_obligatory_keys(["in_storage", "in_table_name"])

        # prepare for loading
        in_storage = local_resources["in_storage"]

        if not self._is_hidden_id():
            local_resources.merge({"id_name":self._id_names})
            
        table_name = local_resources['in_table_name']
        column_names = local_resources['attributes']
        chunked_attributes = self.chunk_columns(storage=in_storage,
                                                   table_name=table_name, 
                                                   column_names=column_names,
                                                   nchunks=1)
        # flatten list
        column_names = [name for name in chunked_attributes[0]
                                if name in in_storage.get_column_names(table_name)]
        data = in_storage.load_table(table_name = table_name, 
                                             column_names = column_names)
        self.df = pd.DataFrame(data)
        self.df.set_index(self._id_names, inplace=True)
        data_computed = {}
        if table_name+".computed" in in_storage.get_table_names():
            column_names_computed = [name for name in column_names
                                if name in in_storage.get_column_names(table_name+".computed")]
            data_computed = in_storage.load_table(table_name = table_name+".computed", 
                                                 column_names = column_names_computed)
            dfcomp = pd.DataFrame(data_computed)
            dfcomp.set_index(self._id_names, inplace=True)
            self.df = concat(self.df, dfcomp)
                      
        for attr in data:
            if not ((attr in self._id_names) and self.attribute_boxes.has_key(attr)): #do not store id_name every time
                self.attribute_boxes[attr] = AttributeBox(self, [],
                                                variable_name=self.create_and_check_qualified_variable_name(attr),
                                                type=AttributeType.PRIMARY,
                                                is_in_memory=True,
                                                header=None,
                                                version=0)

        for attr in data_computed:
            if not ((attr in self._id_names) and self.attribute_boxes.has_key(attr)): #do not store id_name every time
                self.attribute_boxes[attr] = AttributeBox(self, [],
                                                variable_name=self.create_and_check_qualified_variable_name(attr),
                                                type=AttributeType.COMPUTED,
                                                is_in_memory=True,
                                                header=None,
                                                version=0)
                                                                        
        self.n = self.df.shape[0]

    def add_attribute(self, data, name, metadata=2):
        """Add values given in argument 'data' to dataset as an attribute 'name' as type 'metadata'. If this
        attribute already exists, its values are overwritten. 
        'metadata' should be of type AttributeType (PRIMARY=1, COMPUTED=2).
        The method increments and returns the version number of the attribute.
        """
        if not (isinstance(data, ndarray) or is_masked_array(data)):
            data=array(data)
        name = self.create_and_check_qualified_variable_name(name)
        short_name = name.get_alias()
        if short_name in self.get_attribute_names():
            self.attribute_boxes[short_name].set_is_in_memory(True)
            self.attribute_boxes[short_name].set_type(metadata)
        else:
            self.attribute_boxes[short_name] = AttributeBox(self, data=[], variable_name=name,
                                                type=metadata)
        if metadata == AttributeType.PRIMARY:
            self._add_to_primary_attribute_names(short_name)
        self.df[short_name] = data
        self.__increment_version(short_name)
        return self.get_version(short_name)
    
    def attribute_sum(self, name):
        """Return the sum of values of the attribute 'name'.
        """
        return self.df[name].sum()

    def attribute_average(self, name):
        """Return the value of the given attribute averaged over the dataset.
        """
        return self.df[name].mean()

    def summary(self, index=None):
        if index is not None:
            self.df[index].describe()
        else:
            self.df.describe()

    def size(self):
        """Return size of the dataset."""
        return self.df.shape[0]
    
    def get_data_element_by_id(self, id, all_attributes=False):
        """Return an object of class DataElement of the given identifier id. See get_data_element."""
        return self.get_data_element(id, all_attributes)
    
    def get_data_element(self, id, **kwargs):
        """Return an object of class DataElement of the given index. 
        """
        object = DataElement()
        for col in self.get_attribute_names():
            setattr(object, col, self.df[col][id])
        return object
    
    def subset_by_ids(self, ids, **kwargs):
        """Shrink the dataset to values given by 'index'. The removed data are then lost.
        """
        self.df = self.df.loc[ids]
        self.n = self.df.shape[0]

    def aggregate_dataset_over_ids(self, dataset, function='sum', attribute_name=None, constant=None):
        """Aggregate attribute (given by 'attribute_name') of the given 'dataset' over
        self by applying the given function. The dataset is expected to have an attribute of the same
        name as the unique identifier of self. If attribute_name is not given, the
        argument 'constant' must be given, which is either a scalar or a numpy array. if it
        is a scalar, for each individual to be counted the constant value is taken into the function;
        if it is a numpy array of the same size as dataset, the value in the same index as
        individual is counted into the function.
        """
        workdf = dataset.df
        if attribute_name == None:
            if constant == None:
                self._raise_error(StandardError,
                                  "Either 'attribute_name' or 'constant' must be given.")
            elif isinstance(constant, ndarray):
                if constant.size <> dataset_id_values.size:
                    self._raise_error(StandardError,
                                      "constant's size (%d) must be of the same as dataset's size (%d)"
                                      % (constant.size, dataset_id_values.size))
                values = constant
            else:
                values = resize(array([constant]), dataset.size())
            attribute_name = '__constant__'
            workdf[attribute_name] = values 
        else: 
            if is_masked_array(dataset[attribute_name]):
                w = where(ma.getmask(dataset[attribute_name]))
                if len(w)>0:
                    where_masked = w[0]
                    # do not consider those elements in the computation
                    workdf[attribute_name] = ma.filled(workdf[attribute_name], NaN)
        #logger.start_block('Aggregate Pandas')
        grouped = workdf.groupby(self.get_id_name())[attribute_name]
        f = getattr(np, function)
        res = grouped.aggregate(f)
        #logger.end_block()
        return res

    def get_join_data(self, dataset, name, join_attribute=None, return_value_if_not_found=None, **kwargs):
        """Does a join on a attribute of two datasets (self and 'dataset').
        'join_attribute' specifies the join attribute of self. If this is None it is
        assumed to be identical to dataset._id_names which is the join attribute of 'dataset'.
        The method returns values of the attribute 'name' (which is an attribute of 'dataset')
        for the joined ids, i.e. the resulting array should have the same size as self.
        """
        default_return_values_by_type = default_filled_values_by_type = {'S':'',
                                                                         'U':'',
                                                                         'b':False,
                                                                         'i':-1,
                                                                         'u':0,
                                                                         'f':-1.0}
        id_name = dataset.get_id_name()
        jattr = join_attribute
        if jattr == None:
            jattr = id_name
        if not isinstance(jattr, list):
            jattr = [jattr]
        if not isinstance(name, list):
            name = [name]
        #logger.start_block('Disaggregate Pandas')
        result = self.df[jattr].join(dataset.df[name], on=jattr)[name]
        #result = dataset.df[name].loc[self.df[jattr[0]]]
        #logger.end_block()
        for attr in result.columns:
            if result[attr].dtype == object:
                result[attr] = result[attr].astype(dataset.df[attr].dtype)
            if np.isnan(result[attr].values).any():
                k = dataset.df[attr].values.dtype.kind
                if return_value_if_not_found is None and default_return_values_by_type.has_key(k):
                    val = default_return_values_by_type[k]
                else:
                    val = return_value_if_not_found
                result[attr].iloc[where(np.isnan(result[attr].values))] = val                
        return result
    
    def __set_version(self, name, version):
        self.attribute_boxes[name].set_version(version)

    def __increment_version(self, name):
        if self.get_version(name) == None:
            self.__set_version(name, 0)
        else:
            self.__set_version(name, self.get_version(name)+1)
    def run(self,
            specification,
            coefficients,
            agent_set,
            agents_index=None,
            chunk_specification=None,
            data_objects=None,
            run_config=None,
            debuglevel=0):
        """ Run a simulation and return a numpy array of length agents_index, giving agent choices (ids of locations).
            'specification' is of type EquationSpecification,
            'coefficients' is of type Coefficients,
            'agent_set' is of type Dataset,
            'agent_index' are indices of individuals in the agent_set for which
                        the model runs. If it is None, the whole agent_set is considered.
            'chunk_specification' determines number of chunks in which the simulation is processed.
                        Default is to use 300 rows per chunk.
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'run_config' is of type Resources, it gives additional arguments for the run.
            'debuglevel' overwrites the constructor 'debuglevel'.
        """
        if run_config == None:
            run_config = Resources()
        self.run_config = run_config.merge_with_defaults(self.run_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included(
            {agent_set.get_dataset_name(): agent_set})

        ## what is the use of compute location_id string in run? it gets new values anyway
        #if self.location_id_string is not None:
        #    location_id = agent_set.compute_variables(self.location_id_string, dataset_pool=self.dataset_pool)

        ## done in choice_model
        #location_id_name = self.choice_set.get_id_name()[0]
        #if (location_id_name not in agent_set.get_known_attribute_names()):
        #    agent_set.add_attribute(name=location_id_name, data=resize(array([-1]), agent_set.size()))

        if self.run_config.get(
                "agent_units_string", None
        ):  # used when agents take different amount of capacity from the total capacity
            agent_set.compute_variables(
                [self.run_config["agent_units_string"]],
                dataset_pool=self.dataset_pool)

        self.compute_capacity_flag = self.run_config.get(
            "compute_capacity_flag", False)
        capacity_string = None
        self.capacity = None
        if self.compute_capacity_flag:
            capacity_string = self.run_config.get("capacity_string", None)
            if capacity_string is None:
                raise KeyError, \
                    "Entry 'capacity_string' has to be specified in 'run_config' if 'compute_capacity_flag' is True"

        ## if weights is None, use capacity for weights
        if self.run_config.get("weights_for_simulation_string",
                               None) is None and capacity_string is not None:
            self.run_config.merge(
                {"weights_for_simulation_string": capacity_string})

        return ChoiceModel.run(self,
                               specification,
                               coefficients,
                               agent_set,
                               agents_index=agents_index,
                               chunk_specification=chunk_specification,
                               run_config=self.run_config,
                               debuglevel=debuglevel)
    def estimate(self,
                 specification,
                 dataset,
                 outcome_attribute,
                 index=None,
                 procedure=None,
                 data_objects=None,
                 estimate_config=None,
                 debuglevel=0):
        """'specification' is of type EquationSpecification,
            'dataset' is of type Dataset,
            'outcome_attribute' - string that determines the dependent variable,
            'index' are indices of individuals in dataset for which
                    the model runs. If it is None, the whole dataset is considered.
            'procedure' - name of the estimation procedure. If it is None,
                there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class
                must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'.
                It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays).
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure.
            'debuglevel' overwrites the class 'debuglevel'.
        """
        #import wingdbstub
        self.debug.flag = debuglevel
        if estimate_config == None:
            estimate_config = Resources()
        if not isinstance(estimate_config, Resources) and isinstance(
                estimate_config, dict):
            estimate_config = Resources(estimate_config)
        self.estimate_config = estimate_config.merge_with_defaults(
            self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.procedure = procedure
        if self.procedure == None:
            self.procedure = self.estimate_config.get("estimation", None)
        if self.procedure is not None:
            self.procedure = ModelComponentCreator().get_model_component(
                self.procedure)
        else:
            logger.log_warning(
                "No estimation procedure given, or problems with loading the corresponding module."
            )

        compute_resources = Resources({"debug": self.debug})
        if dataset.size() <= 0:  # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())
        if not isinstance(index, ndarray):
            index = array(index)

        estimation_size_agents = self.estimate_config.get(
            "estimation_size_agents",
            None)  # should be a proportion of the agent_set
        if estimation_size_agents == None:
            estimation_size_agents = 1.0
        else:
            estimation_size_agents = max(min(estimation_size_agents, 1.0),
                                         0.0)  # between 0 and 1

        if estimation_size_agents < 1.0:
            self.debug.print_debug("Sampling agents for estimation ...", 3)
            estimation_idx = sample_noreplace(
                arange(index.size), int(index.size * estimation_size_agents))
        else:
            estimation_idx = arange(index.size)

        estimation_idx = index[estimation_idx]
        self.debug.print_debug(
            "Number of observations for estimation: " +
            str(estimation_idx.size), 2)
        if estimation_idx.size <= 0:
            self.debug.print_debug("Nothing to be done.", 2)
            return (None, None)

        coefficients = create_coefficient_from_specification(specification)
        specified_coefficients = SpecifiedCoefficients().create(coefficients,
                                                                specification,
                                                                neqs=1)
        submodels = specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(
            submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(
            submodels,
            self.submodel_string,
            dataset,
            estimation_idx,
            dataset_pool=self.dataset_pool,
            resources=compute_resources,
            submodel_size_max=self.estimate_config.get('submodel_size_max',
                                                       None))
        variables = specified_coefficients.get_full_variable_names_without_constants(
        )
        self.debug.print_debug("Compute variables ...", 4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables,
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)

        coef = {}
        estimated_coef = {}
        self.outcome = {}
        dataset.compute_variables([outcome_attribute],
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)
        regression_resources = Resources(estimate_config)
        regression_resources.merge({"debug": self.debug})
        outcome_variable_name = VariableName(outcome_attribute)
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(
                specified_coefficients, submodel)
            self.increment_current_status_piece()
            logger.log_status("Estimate regression for submodel " +
                              str(submodel),
                              tags=["estimate"],
                              verbosity_level=2)
            logger.log_status("Number of observations: " +
                              str(self.observations_mapping[submodel].size),
                              tags=["estimate"],
                              verbosity_level=2)
            self.data[
                submodel] = dataset.create_regression_data_for_estimation(
                    coef[submodel],
                    index=estimation_idx[self.observations_mapping[submodel]])
            self.coefficient_names[submodel] = coef[
                submodel].get_coefficient_names_without_constant()[0, :]
            if (self.data[submodel].shape[0] > 0
                ) and (self.data[submodel].size > 0) and (
                    self.procedure
                    is not None):  # observations for this submodel available
                self.outcome[submodel] = dataset.get_attribute_by_index(
                    outcome_variable_name.get_alias(),
                    estimation_idx[self.observations_mapping[submodel]])
                regression_resources.merge({"outcome": self.outcome[submodel]})
                regression_resources.merge({
                    "coefficient_names":
                    self.coefficient_names[submodel].tolist(),
                    "constant_position":
                    coef[submodel].get_constants_positions()
                })
                estimated_coef[submodel] = self.procedure.run(
                    self.data[submodel],
                    self.regression,
                    resources=regression_resources)
                if "estimators" in estimated_coef[submodel].keys():
                    coef[submodel].set_coefficient_values(
                        estimated_coef[submodel]["estimators"])
                if "standard_errors" in estimated_coef[submodel].keys():
                    coef[submodel].set_standard_errors(
                        estimated_coef[submodel]["standard_errors"])
                if "other_measures" in estimated_coef[submodel].keys():
                    for measure in estimated_coef[submodel][
                            "other_measures"].keys():
                        coef[submodel].set_measure(
                            measure, estimated_coef[submodel]["other_measures"]
                            [measure])
                if "other_info" in estimated_coef[submodel].keys():
                    for info in estimated_coef[submodel]["other_info"]:
                        coef[submodel].set_other_info(
                            info, estimated_coef[submodel]["other_info"][info])
        coefficients.fill_coefficients(coef)

        self.save_predicted_values_and_errors(specification,
                                              coefficients,
                                              dataset,
                                              outcome_variable_name,
                                              index=index,
                                              data_objects=data_objects)

        return (coefficients, estimated_coef)
Beispiel #26
0
    def load_dataset(self, resources=None, attributes=None, in_storage=None,
                     in_table_name=None, lowercase=None, **kwargs):

        #set defaults
        attributes_default = '*'
        lower_default = 1 # if 1, use lowercase for attribute names

        # merge arguments with dictionaries and add missing entries
        local_resources = Resources(self.resources)
        if resources is not None:
            local_resources.merge_if_not_None(resources)
        local_resources.merge_if_not_None({"attributes":attributes,
                                           "in_storage":in_storage,
                                           "in_table_name":in_table_name,
                                           "lowercase":lowercase})
        local_resources.merge_with_defaults({"attributes":attributes_default,
                                             "lowercase":lower_default,
                                            })

        # check obligatory entries
        local_resources.check_obligatory_keys(["in_storage", "in_table_name"])

        # prepare for loading
        in_storage = local_resources["in_storage"]

        if not self._is_hidden_id():
            local_resources.merge({"id_name":self._id_names})
            
        table_name = local_resources['in_table_name']
        column_names = local_resources['attributes']
        chunked_attributes = self.chunk_columns(storage=in_storage,
                                                   table_name=table_name, 
                                                   column_names=column_names,
                                                   nchunks=1)
        # flatten list
        column_names = [name for name in chunked_attributes[0]
                                if name in in_storage.get_column_names(table_name)]
        data = in_storage.load_table(table_name = table_name, 
                                             column_names = column_names)
        self.df = pd.DataFrame(data)
        self.df.set_index(self._id_names, inplace=True)
        data_computed = {}
        if table_name+".computed" in in_storage.get_table_names():
            column_names_computed = [name for name in column_names
                                if name in in_storage.get_column_names(table_name+".computed")]
            data_computed = in_storage.load_table(table_name = table_name+".computed", 
                                                 column_names = column_names_computed)
            dfcomp = pd.DataFrame(data_computed)
            dfcomp.set_index(self._id_names, inplace=True)
            self.df = concat(self.df, dfcomp)
                      
        for attr in data:
            if not ((attr in self._id_names) and self.attribute_boxes.has_key(attr)): #do not store id_name every time
                self.attribute_boxes[attr] = AttributeBox(self, [],
                                                variable_name=self.create_and_check_qualified_variable_name(attr),
                                                type=AttributeType.PRIMARY,
                                                is_in_memory=True,
                                                header=None,
                                                version=0)

        for attr in data_computed:
            if not ((attr in self._id_names) and self.attribute_boxes.has_key(attr)): #do not store id_name every time
                self.attribute_boxes[attr] = AttributeBox(self, [],
                                                variable_name=self.create_and_check_qualified_variable_name(attr),
                                                type=AttributeType.COMPUTED,
                                                is_in_memory=True,
                                                header=None,
                                                version=0)
                                                                        
        self.n = self.df.shape[0]
    def run(self,
            specification,
            coefficients,
            agent_set,
            agents_index=None,
            agents_filter=None,
            chunk_specification=None,
            data_objects=None,
            run_config=None,
            debuglevel=0,
            maximum_runs=10):

        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if agents_index is None:
            if agents_filter is not None:
                agent_set.compute_variables(agents_filter,
                                            dataset_pool=self.dataset_pool)
                agents_index = where(
                    agent_set.get_attribute(
                        VariableName(agents_filter).get_alias()))[0]
            else:
                agents_index = arange(agent_set.size())
        if not isinstance(agents_index, ndarray):
            try:
                agents_index = array(agents_index)
            except:
                raise TypeError, "Argument agents_index is of wrong type (numpy array or list allowed.)"

        if agents_index.size <= 0:
            logger.log_status("Nothing to be done.")
            return array([], dtype='int32')

        if run_config == None:
            run_config = Resources()
        self.run_config = run_config.merge_with_defaults(self.run_config)
        self.number_of_units_string = self.run_config.get(
            "number_of_units_string", None)
        self.number_of_agents_string = self.run_config.get(
            "number_of_agents_string", "%s.number_of_agents(%s)" %
            (self.choice_set.get_dataset_name(), agent_set.get_dataset_name()))

        if self.number_of_units_string is None:
            maximum_runs = 1
        unplaced = arange(agents_index.size)
        id_name = self.choice_set.get_id_name()[0]
        for run in range(maximum_runs):
            unplaced_size_before_model = unplaced.size
            choices = LocationChoiceModel.run(self,
                                              specification,
                                              coefficients,
                                              agent_set,
                                              agents_index[unplaced],
                                              chunk_specification,
                                              debuglevel=debuglevel)
            if run == 0:
                all_choices = choices
            else:
                all_choices[unplaced] = choices
            unplaced = self.get_movers_from_overfilled_locations(
                agent_set, agents_index, config=run_config)
            if (unplaced.size <=
                    0) or (unplaced_size_before_model == unplaced.size) or (
                        unplaced.size
                        == (unplaced_size_before_model -
                            self.observations_mapping['mapped_index'].size)):
                break
            agent_set.set_values_of_one_attribute(id_name, -1,
                                                  agents_index[unplaced])
        return all_choices