Esempio n. 1
0
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """  # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id": self.field_submodel_id,
            "field_equation_id": self.field_equation_id,
            "field_coefficient_name": self.field_coefficient_name,
            "field_variable_name": self.field_variable_name,
            "field_fixed_value": self.field_fixed_value,
            "out_table_name": out_table_name
        })
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning(
                "out_storage has to be of type Storage. No EquationSpecifications written."
            )
            return

        submodel_ids = self.get_submodels()
        if submodel_ids.size == 0:
            submodel_ids = resize(array(
                [-2], dtype="int32"), len(self.get_coefficient_names(
                )))  #set sub_model_id = -2 when there is no or 1 submodels

        equation_ids = self.get_equations()
        if equation_ids.size == 0:
            equation_ids = resize(array([-2], dtype="int32"),
                                  submodel_ids.size)

        values = {
            local_resources["field_submodel_id"]: submodel_ids,
            local_resources["field_equation_id"]: equation_ids,
            local_resources["field_coefficient_name"]:
            self.get_coefficient_names(),
            local_resources["field_variable_name"]:
            self.get_long_variable_names()
        }
        if self.fixed_values.size > 0:
            values[local_resources["field_fixed_value"]] = self.fixed_values
        for field in self.other_fields.keys():
            values[field] = self.other_fields[field]

        types = {
            local_resources["field_submodel_id"]: 'integer',
            local_resources["field_equation_id"]: 'integer',
            local_resources["field_coefficient_name"]: 'text',
            local_resources["field_variable_name"]: 'text'
        }

        local_resources.merge({
            "values": values,
            'valuetypes': types,
            "drop_table_flag": 1
        })

        self.out_storage.write_table(
            table_name=local_resources['out_table_name'],
            table_data=local_resources['values'])
    def get_resources(self, data_dictionary, dataset):
        """Create resources for computing a variable. """
        resources=Resources()
        for key in data_dictionary.keys():
            if key in self.datasets:
                data = data_dictionary[key]
                if self.id_names[key] not in data_dictionary[key].keys() and not isinstance(self.id_names[key], list):
            
                    data[self.id_names[key]] = arange(1,\
                        len(data_dictionary[key][data_dictionary[key].keys()[0]])+1) # add id array
                
                if key == "land_cover":
                    land_cover_storage = StorageFactory().get_storage('dict_storage')
                    land_cover_table_name = 'land_cover'
                    land_cover_storage.write_table(
                            table_name=land_cover_table_name,
                            table_data=data,
                        )

                    lc = LandCoverDataset(
                        in_storage=land_cover_storage, 
                        in_table_name=land_cover_table_name, 
                        )
                        
                    # add relative_x and relative_y
                    lc.get_id_attribute()
                    n = int(ceil(sqrt(lc.size())))
                    
                    if "relative_x" not in data.keys():
                        x = (indices((n,n))+1)[1].ravel()
                        lc.add_attribute(x[0:lc.size()], "relative_x", metadata=1)
                    if "relative_y" not in data.keys():
                        y = (indices((n,n))+1)[0].ravel()
                        lc.add_attribute(y[0:lc.size()], "relative_y", metadata=1)
                        
                    resources.merge({key: lc})
                    
                if key == "gridcell":
                    gridcell_storage = StorageFactory().get_storage('dict_storage')
                    gridcell_table_name = 'gridcell'
                    gridcell_storage.write_table(
                            table_name=gridcell_table_name,
                            table_data=data,
                        )
                    
                    gridcell_dataset = GridcellDataset(
                        in_storage = gridcell_storage,
                        in_table_name = gridcell_table_name,
                        )
                    
                    resources.merge({key: gridcell_dataset})
            else:
                resources.merge({key:data_dictionary[key]})

        if dataset in self.interactions:
            pass
        else:
            resources.merge({"dataset": resources[dataset]})
        resources.merge({"check_variables":'*', "debug":4})
        return resources
Esempio n. 3
0
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """  # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id": self.field_submodel_id,
            "field_coefficient_name": self.field_coefficient_name,
            "field_estimate": self.field_estimate,
            "field_standard_error": self.field_standard_error,
            "other_fields": self.other_fields,
            "out_table_name": out_table_name
        })
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning(
                "out_storage has to be of type Storage. No coefficients written."
            )
            return

        submodels = self.get_submodels()
        if submodels.size <= 0:
            submodels = resize(array([-2], dtype=int32), self.size())
        values = {
            local_resources["field_submodel_id"]: submodels,
            local_resources["field_coefficient_name"]: self.get_names(),
            local_resources["field_estimate"]: self.get_values(),
            local_resources["field_standard_error"]:
            self.get_standard_errors()
        }
        for measure in self.other_measures.keys():
            values[measure] = self.other_measures[measure]
        types = {
            local_resources["field_submodel_id"]: 'integer',
            local_resources["field_coefficient_name"]: 'text',
            local_resources["field_estimate"]: 'double',
            local_resources["field_standard_error"]: 'double'
        }
        attrtypes = {
            local_resources["field_submodel_id"]: AttributeType.PRIMARY,
            local_resources["field_coefficient_name"]: AttributeType.PRIMARY,
            local_resources["field_estimate"]: AttributeType.PRIMARY,
            local_resources["field_standard_error"]: AttributeType.PRIMARY
        }
        for measure in self.other_measures.keys():
            types[measure] = 'double'
            attrtypes[measure] = AttributeType.PRIMARY
        local_resources.merge({
            "values": values,
            'valuetypes': types,
            "drop_table_flag": 1,
            "attrtype": attrtypes
        })

        self.out_storage.write_table(
            table_name=local_resources['out_table_name'],
            table_data=local_resources['values'])
Esempio n. 4
0
 def run(self, data=None, coefficients=None, resources=None):
     local_resources = Resources()
     if resources:
         local_resources.merge(resources)
     last_result = self.compute_utilities(data=data, coefficients=coefficients, resources=local_resources)
     this_result = self.compute_probabilities(resources=local_resources)
     if this_result <> None:
         last_result = this_result
     this_result = self.compute_choices(resources=local_resources)
     if this_result <> None:
         last_result = this_result
     return last_result
    def preprocess_projects(self, agent_set, agents_index=None, data_objects=None):
        """Split projects that don't find enough choices to smaller ones (of average size).
        """
        resources=Resources(data_objects)
        resources.merge({"debug":self.debug})

        self.choice_set.compute_variables([self.developable_maximum_unit_full_name,
                                           self.developable_minimum_unit_full_name],
                                          resources=resources)

        max_capacity = self.choice_set.get_attribute(self.developable_maximum_unit_short_name)
        min_capacity = self.choice_set.get_attribute(self.developable_minimum_unit_short_name)

        self.set_choice_set_size()
        nchoices = self.get_choice_set_size()
        project_average_size = agent_set.get_attribute(agent_set.get_attribute_name()).mean()
        add_projects = 0
        remove_projects = 0

        if agents_index == None:
            agents_index=arange(agent_set.size())
        # order agents by size
        ordered_indices = argsort(-1*agent_set.get_attribute_by_index(agent_set.get_attribute_name(), agents_index))
        improvement_values=[]
        projects_ids = agent_set.get_id_attribute()[agents_index].tolist()
        #   how many projects fit in each developable location
        project_sizes = agent_set.get_attribute_by_index(agent_set.get_attribute_name(), agents_index)
        for iagent in ordered_indices:
            project_size = project_sizes[iagent]
            capacity =  logical_and(project_size > min_capacity, (max_capacity / project_size) > 0)
            if where(capacity)[0].size < nchoices: # not enough choices found
                nsplitted = int(project_size/project_average_size)
                add_projects += nsplitted
                remove_projects+=1
                projects_ids.remove(agent_set.get_id_attribute()[agents_index[iagent]])
                improvement_values = improvement_values + \
                    nsplitted*[agent_set.get_attribute_by_index("improvement_value", agents_index[iagent])]
            else:
                break # we can break here, since the projects are sorted by size

        if remove_projects > 0:
            agent_set.remove_elements(agents_index[ordered_indices[0:remove_projects]])
            agents_index = agent_set.get_id_index(projects_ids)

        if add_projects > 0:
            max_id = agent_set.get_attribute(agent_set.get_id_name()[0]).max()
            ids = arange(max_id+1,max_id+1+add_projects)
            agent_set.add_elements(data={"project_id":ids,
                self.location_set.get_id_name()[0]:zeros((add_projects,)),
                "improvement_value":array(improvement_values),
                agent_set.get_attribute_name(): project_average_size*ones((add_projects,))},
                require_all_attributes=False)
            agents_index = agent_set.get_id_index(projects_ids + ids.tolist())
 def _compute_vacancy_variables(self, location_set, dev_model_configs, resources):
     compute_resources = Resources(resources)
     compute_resources.merge({"debug": self.debug})
     self.units_variable = {}
     self.variable_for_vacancy = {}
     for project_type in dev_model_configs:
         self.units_variable[project_type] = dev_model_configs[project_type]["units"]
         self.variable_for_vacancy[project_type] = compute_resources.get(
             "%s_vacant_variable" % project_type,
             "urbansim.%s.vacant_%s" % (location_set.get_dataset_name(), self.units_variable[project_type]),
         )
         location_set.compute_variables([self.variable_for_vacancy[project_type]], resources=compute_resources)
Esempio n. 7
0
 def run(self, data=None, coefficients=None, resources=None):
     local_resources = Resources()
     if resources:
         local_resources.merge(resources)
     last_result = self.compute_utilities(data, coefficients,
                                          local_resources)
     this_result = self.compute_probabilities(local_resources)
     if this_result <> None:
         last_result = this_result
     this_result = self.compute_choices(local_resources)
     if this_result <> None:
         last_result = this_result
     return last_result
    def run(self, data=None, coefficients=None, resources=None):
        local_resources = Resources()
        if resources:
            local_resources.merge(resources)
        last_result = self.compute_utilities(data, coefficients, local_resources)
#        self.debug.print_debug("utilities: %s" % last_result, 3) # added 7 jul 09
        this_result = self.compute_probabilities(local_resources)
#        self.debug.print_debug("probabilities: %s" % this_result, 3) # added 7 jul 09
        if this_result <> None:
            last_result = this_result
        this_result = self.compute_choices(local_resources) # determines choices based on probabilities
#        self.debug.print_debug("choices: %s" % this_result, 3) # added 7 jul 09
        if this_result <> None:
            last_result = this_result
        return last_result
Esempio n. 9
0
 def _compute_vacancy_and_total_units_variables(self, location_set, project_types, resources=None):
     compute_resources = Resources(resources)
     compute_resources.merge({"debug":self.debug})
     self.variable_for_vacancy = {}
     self.variable_for_total_units = {}
     for ptype in project_types:
         self.variable_for_vacancy[ptype] = compute_resources.get(
                                 "%s_vacant_variable" % ptype,
                                 "urbansim_zone.%s.vacant_%s" % (location_set.get_dataset_name(),
                                                                  self.project_specific_units[ptype]))
         self.variable_for_total_units[ptype] = compute_resources.get(
                                 "%s_total_units_variable" % ptype,
                                 "%s.aggregate(urbansim_zone.building.total_%s)" % (location_set.get_dataset_name(), 
                                                          self.project_specific_units[ptype]))
         location_set.compute_variables([self.variable_for_vacancy[ptype], self.variable_for_total_units[ptype]], 
                                        dataset_pool=self.dataset_pool, resources = compute_resources)
 def _compute_vacancy_variables(self, location_set, dev_model_configs,
                                resources):
     compute_resources = Resources(resources)
     compute_resources.merge({"debug": self.debug})
     self.units_variable = {}
     self.variable_for_vacancy = {}
     for project_type in dev_model_configs:
         self.units_variable[project_type] = dev_model_configs[
             project_type]['units']
         self.variable_for_vacancy[project_type] = compute_resources.get(
             "%s_vacant_variable" % project_type,
             "urbansim.%s.vacant_%s" % (location_set.get_dataset_name(),
                                        self.units_variable[project_type]))
         location_set.compute_variables(
             [self.variable_for_vacancy[project_type]],
             resources=compute_resources)
Esempio n. 11
0
    def __init__(self, config=None, save_estimation_results=False):
        if 'cache_directory' not in config or config['cache_directory'] is None:
            raise KeyError("The cache directory must be specified in the "
                "given configuration, giving the filesystem path to the cache "
                "directory containing the data with which to estimate. Please "
                "check that your configuration contains the 'cache_directory' "
                "entry and that it is not None.")

        self.simulation_state = SimulationState(new_instance=True, start_time=config.get('base_year', 0))
        self.simulation_state.set_cache_directory(config['cache_directory'])

        SessionConfiguration(new_instance=True,
                             package_order=config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        self.config = Resources(config)
        self.save_estimation_results = save_estimation_results
        self.debuglevel = self.config.get("debuglevel", 4)
        self.model_system = ModelSystem()
        self.agents_index_for_prediction = None
        
        models = self.config.get('models',[])

        self.model_name = None
        if "model_name" in config.keys():
            self.model_name = config["model_name"]
        else:
            for model in models:
                if isinstance(model, dict):
                    model_name = model.keys()[0]
                    if (model[model_name] == "estimate") or (isinstance(model[model_name], list)
                        and ("estimate" in model[model_name])):
                            self.model_name = model_name
                            break
        estimate_config_changes = self.config.get('config_changes_for_estimation', {}).get('estimate_config', {})
        if len(estimate_config_changes) > 0:
            change = Resources({'models_configuration': {self.model_name: {'controller': {'init': {'arguments': {}}}}}})
            estimate_config_str = self.config['models_configuration'].get(self.model_name, {}).get('controller', {}).get('init', {}).get('arguments', {}).get('estimate_config', '{}')
            estimate_config = Resources({})
            try:
                estimate_config = eval(estimate_config_str)
            except:
                pass
 
            estimate_config.merge(estimate_config_changes)
            self.config.merge(change)
            self.config['models_configuration'][self.model_name]['controller']['init']['arguments']['estimate_config'] = 'Resources(%s)' % estimate_config
Esempio n. 12
0
    def __init__(self, config=None, save_estimation_results=False):
        if 'cache_directory' not in config or config['cache_directory'] is None:
            raise KeyError("The cache directory must be specified in the "
                "given configuration, giving the filesystem path to the cache "
                "directory containing the data with which to estimate. Please "
                "check that your configuration contains the 'cache_directory' "
                "entry and that it is not None.")

        self.simulation_state = SimulationState(new_instance=True)
        self.simulation_state.set_cache_directory(config['cache_directory'])

        SessionConfiguration(new_instance=True,
                             package_order=config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        self.config = Resources(config)
        self.save_estimation_results = save_estimation_results
        self.debuglevel = self.config.get("debuglevel", 4)
        self.model_system = ModelSystem()
        self.agents_index_for_prediction = None
        
        models = self.config.get('models',[])

        self.model_name = None
        if "model_name" in config.keys():
            self.model_name = config["model_name"]
        else:
            for model in models:
                if isinstance(model, dict):
                    model_name = model.keys()[0]
                    if (model[model_name] == "estimate") or (isinstance(model[model_name], list)
                        and ("estimate" in model[model_name])):
                            self.model_name = model_name
                            break
        estimate_config_changes = self.config.get('config_changes_for_estimation', {}).get('estimate_config', {})
        if len(estimate_config_changes) > 0:
            change = Resources({'models_configuration': {self.model_name: {'controller': {'init': {'arguments': {}}}}}})
            estimate_config_str = self.config['models_configuration'].get(self.model_name, {}).get('controller', {}).get('init', {}).get('arguments', {}).get('estimate_config', '{}')
            estimate_config = Resources({})
            try:
                estimate_config = eval(estimate_config_str)
            except:
                pass
 
            estimate_config.merge(estimate_config_changes)
            self.config.merge(change)
            self.config['models_configuration'][self.model_name]['controller']['init']['arguments']['estimate_config'] = 'Resources(%s)' % estimate_config
Esempio n. 13
0
 def run(self, data=None, coefficients=None, resources=None):
     local_resources = Resources()
     if resources:
         local_resources.merge(resources)
     last_result = self.compute_utilities(data, coefficients,
                                          local_resources)
     #        self.debug.print_debug("utilities: %s" % last_result, 3) # added 7 jul 09
     this_result = self.compute_probabilities(local_resources)
     #        self.debug.print_debug("probabilities: %s" % this_result, 3) # added 7 jul 09
     if this_result <> None:
         last_result = this_result
     this_result = self.compute_choices(
         local_resources)  # determines choices based on probabilities
     #        self.debug.print_debug("choices: %s" % this_result, 3) # added 7 jul 09
     if this_result <> None:
         last_result = this_result
     return last_result
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """ # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id":self.field_submodel_id,
            "field_equation_id":self.field_equation_id,
            "field_coefficient_name":self.field_coefficient_name,
            "field_variable_name":self.field_variable_name,
            "field_fixed_value":self.field_fixed_value,
            "out_table_name":out_table_name})
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning("out_storage has to be of type Storage. No EquationSpecifications written.")
            return

        submodel_ids = self.get_submodels()
        if submodel_ids.size == 0:
            submodel_ids = resize(array([-2], dtype="int32"), len(self.get_coefficient_names())) #set sub_model_id = -2 when there is no or 1 submodels

        equation_ids = self.get_equations()
        if equation_ids.size == 0:
            equation_ids = resize(array([-2], dtype="int32"), submodel_ids.size)

        values = {local_resources["field_submodel_id"]: submodel_ids,
               local_resources["field_equation_id"]:  equation_ids,
               local_resources["field_coefficient_name"]:  self.get_coefficient_names(),
               local_resources["field_variable_name"]:  self.get_long_variable_names()}
        if self.fixed_values.size > 0:
            values[local_resources["field_fixed_value"]] = self.fixed_values
        for field in self.other_fields.keys():
            values[field] = self.other_fields[field]

        types = {local_resources["field_submodel_id"]: 'integer',
               local_resources["field_equation_id"]:  'integer',
               local_resources["field_coefficient_name"]:  'text',
               local_resources["field_variable_name"]:  'text'}

        local_resources.merge({"values":values, 'valuetypes': types, "drop_table_flag":1})
        
        self.out_storage.write_table(table_name = local_resources['out_table_name'],
            table_data=local_resources['values']
            )
Esempio n. 15
0
    def write(self, resources=None, out_storage=None, out_table_name=None):
        """
        """ # TODO: insert docstring
        local_resources = Resources(resources)
        local_resources.merge_with_defaults({
            "field_submodel_id":self.field_submodel_id,
            "field_coefficient_name":self.field_coefficient_name,
            "field_estimate":self.field_estimate,
            "field_standard_error":self.field_standard_error,
            "other_fields":self.other_fields,
            "out_table_name":out_table_name})
        if out_storage <> None:
            self.out_storage = out_storage
        if not isinstance(self.out_storage, Storage):
            logger.log_warning("out_storage has to be of type Storage. No coefficients written.")
            return

        submodels = self.get_submodels()
        if submodels.size <= 0 :
            submodels = resize(array([-2], dtype=int32), self.size())
        values = {local_resources["field_submodel_id"]: submodels,
               local_resources["field_coefficient_name"]:  self.get_names(),
               local_resources["field_estimate"]:  self.get_values(),
               local_resources["field_standard_error"]:  self.get_standard_errors()}
        for measure in self.other_measures.keys():
            values[measure] = self.other_measures[measure]
        types = {local_resources["field_submodel_id"]: 'integer',
               local_resources["field_coefficient_name"]:  'text',
               local_resources["field_estimate"]:  'double',
               local_resources["field_standard_error"]:  'double'}
        attrtypes = {local_resources["field_submodel_id"]: AttributeType.PRIMARY,
               local_resources["field_coefficient_name"]:  AttributeType.PRIMARY,
               local_resources["field_estimate"]:  AttributeType.PRIMARY,
               local_resources["field_standard_error"]: AttributeType.PRIMARY}
        for measure in self.other_measures.keys():
            types[measure]= 'double'
            attrtypes[measure] = AttributeType.PRIMARY
        local_resources.merge({"values":values, 'valuetypes': types, "drop_table_flag":1,
                               "attrtype":attrtypes})
        
        self.out_storage.write_table(table_name=local_resources['out_table_name'],
            table_data = local_resources['values'])       
 def _compute_vacancy_and_total_units_variables(self,
                                                location_set,
                                                project_types,
                                                resources=None):
     compute_resources = Resources(resources)
     compute_resources.merge({"debug": self.debug})
     self.variable_for_vacancy = {}
     self.variable_for_total_units = {}
     for ptype in project_types:
         self.variable_for_vacancy[ptype] = compute_resources.get(
             "%s_vacant_variable" % ptype, "urbansim_zone.%s.vacant_%s" %
             (location_set.get_dataset_name(),
              self.project_specific_units[ptype]))
         self.variable_for_total_units[ptype] = compute_resources.get(
             "%s_total_units_variable" % ptype,
             "%s.aggregate(urbansim_zone.building.total_%s)" %
             (location_set.get_dataset_name(),
              self.project_specific_units[ptype]))
         location_set.compute_variables([
             self.variable_for_vacancy[ptype],
             self.variable_for_total_units[ptype]
         ],
                                        dataset_pool=self.dataset_pool,
                                        resources=compute_resources)
Esempio n. 17
0
class RegressionModel(ChunkModel):

    model_name = "Regression Model"
    model_short_name = "RM"

    def __init__(self,
                 regression_procedure="opus_core.linear_regression",
                 submodel_string=None,
                 run_config=None,
                 estimate_config=None,
                 debuglevel=0,
                 dataset_pool=None):

        self.debug = DebugPrinter(debuglevel)

        self.dataset_pool = self.create_dataset_pool(dataset_pool)

        self.regression = RegressionModelFactory().get_model(
            name=regression_procedure)
        if self.regression == None:
            raise StandardError, "No regression procedure given."

        self.submodel_string = submodel_string

        self.run_config = run_config
        if self.run_config == None:
            self.run_config = Resources()
        if not isinstance(self.run_config, Resources) and isinstance(
                self.run_config, dict):
            self.run_config = Resources(self.run_config)

        self.estimate_config = estimate_config
        if self.estimate_config == None:
            self.estimate_config = Resources()
        if not isinstance(self.estimate_config, Resources) and isinstance(
                self.estimate_config, dict):
            self.estimate_config = Resources(self.estimate_config)

        self.data = {}
        self.coefficient_names = {}
        ChunkModel.__init__(self)
        self.get_status_for_gui().initialize_pieces(3,
                                                    pieces_description=array([
                                                        'initialization',
                                                        'computing variables',
                                                        'submodel: 1'
                                                    ]))

    def run(self,
            specification,
            coefficients,
            dataset,
            index=None,
            chunk_specification=None,
            data_objects=None,
            run_config=None,
            initial_values=None,
            procedure=None,
            debuglevel=0):
        """'specification' is of type EquationSpecification,
            'coefficients' is of type Coefficients,
            'dataset' is of type Dataset,
            'index' are indices of individuals in dataset for which
                        the model runs. If it is None, the whole dataset is considered.
            'chunk_specification' determines  number of chunks in which the simulation is processed.
            'data_objects' is a dictionary where each key is the name of an data object
            ('zone', ...) and its value is an object of class  Dataset.
           'run_config' is of type Resources, it gives additional arguments for the run.
           If 'procedure' is given, it overwrites the regression_procedure of the constructor.
           'initial_values' is an array of the initial values of the results. It will be overwritten
           by the results for those elements that are handled by the model (defined by submodels in the specification).
           By default the results are initialized with 0.
            'debuglevel' overwrites the constructor 'debuglevel'.
        """
        self.debug.flag = debuglevel
        if run_config == None:
            run_config = Resources()
        if not isinstance(run_config, Resources) and isinstance(
                run_config, dict):
            run_config = Resources(run_config)
        self.run_config = run_config.merge_with_defaults(self.run_config)
        self.run_config.merge({"debug": self.debug})
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if procedure is not None:
            self.regression = RegressionModelFactory().get_model(
                name=procedure)
        if initial_values is None:
            self.initial_values = zeros((dataset.size(), ), dtype=float32)
        else:
            self.initial_values = zeros((dataset.size(), ),
                                        dtype=initial_values.dtype)
            self.initial_values[index] = initial_values

        if dataset.size() <= 0:  # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())

        result = ChunkModel.run(self,
                                chunk_specification,
                                dataset,
                                index,
                                float32,
                                specification=specification,
                                coefficients=coefficients)
        return result

    def run_chunk(self, index, dataset, specification, coefficients):
        self.specified_coefficients = SpecifiedCoefficients().create(
            coefficients, specification, neqs=1)
        compute_resources = Resources({"debug": self.debug})
        submodels = self.specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(
            submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(submodels,
                                     self.submodel_string,
                                     dataset,
                                     index,
                                     dataset_pool=self.dataset_pool,
                                     resources=compute_resources)
        variables = self.specified_coefficients.get_full_variable_names_without_constants(
        )
        self.debug.print_debug("Compute variables ...", 4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables,
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)
        data = {}
        coef = {}
        outcome = self.initial_values[index].copy()
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(
                self.specified_coefficients, submodel)
            self.coefficient_names[submodel] = coef[
                submodel].get_coefficient_names_without_constant()[0, :]
            self.debug.print_debug(
                "Compute regression for submodel " + str(submodel), 4)
            self.increment_current_status_piece()
            self.data[submodel] = dataset.create_regression_data(
                coef[submodel],
                index=index[self.observations_mapping[submodel]])
            nan_index = where(isnan(self.data[submodel]))[1]
            inf_index = where(isinf(self.data[submodel]))[1]
            if nan_index.size > 0:
                nan_var_index = unique(nan_index)
                raise ValueError, "NaN(Not A Number) is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % coef[
                    submodel].get_variable_names()[nan_var_index]
            if inf_index.size > 0:
                inf_var_index = unique(inf_index)
                raise ValueError, "Inf is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % coef[
                    submodel].get_variable_names()[inf_var_index]

            if (self.data[submodel].shape[0] >
                    0) and (self.data[submodel].size >
                            0):  # observations for this submodel available
                outcome[self.observations_mapping[submodel]] = \
                    self.regression.run(self.data[submodel], coef[submodel].get_coefficient_values()[0,:],
                        resources=self.run_config).astype(outcome.dtype)
        return outcome

    def correct_infinite_values(self,
                                dataset,
                                outcome_attribute_name,
                                maxvalue=1e+38,
                                clip_all_larger_values=False):
        """Check if the model resulted in infinite values. If yes,
        print warning and clip the values to maxvalue. 
        If clip_all_larger_values is True, all values larger than maxvalue are clip to maxvalue.
        """
        infidx = where(dataset.get_attribute(outcome_attribute_name) == inf)[0]

        if infidx.size > 0:
            logger.log_warning("Infinite values in %s. Clipped to %s." %
                               (outcome_attribute_name, maxvalue))
            dataset.set_values_of_one_attribute(outcome_attribute_name,
                                                maxvalue, infidx)
        if clip_all_larger_values:
            idx = where(
                dataset.get_attribute(outcome_attribute_name) > maxvalue)[0]
            if idx.size > 0:
                logger.log_warning(
                    "Values in %s larger than %s. Clipped to %s." %
                    (outcome_attribute_name, maxvalue, maxvalue))
                dataset.set_values_of_one_attribute(outcome_attribute_name,
                                                    maxvalue, idx)

    def estimate(self,
                 specification,
                 dataset,
                 outcome_attribute,
                 index=None,
                 procedure=None,
                 data_objects=None,
                 estimate_config=None,
                 debuglevel=0):
        """'specification' is of type EquationSpecification,
            'dataset' is of type Dataset,
            'outcome_attribute' - string that determines the dependent variable,
            'index' are indices of individuals in dataset for which
                    the model runs. If it is None, the whole dataset is considered.
            'procedure' - name of the estimation procedure. If it is None,
                there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class
                must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'.
                It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays).
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure.
            'debuglevel' overwrites the class 'debuglevel'.
        """
        #import wingdbstub
        self.debug.flag = debuglevel
        if estimate_config == None:
            estimate_config = Resources()
        if not isinstance(estimate_config, Resources) and isinstance(
                estimate_config, dict):
            estimate_config = Resources(estimate_config)
        self.estimate_config = estimate_config.merge_with_defaults(
            self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.procedure = procedure
        if self.procedure == None:
            self.procedure = self.estimate_config.get("estimation", None)
        if self.procedure is not None:
            self.procedure = ModelComponentCreator().get_model_component(
                self.procedure)
        else:
            logger.log_warning(
                "No estimation procedure given, or problems with loading the corresponding module."
            )

        compute_resources = Resources({"debug": self.debug})
        if dataset.size() <= 0:  # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())
        if not isinstance(index, ndarray):
            index = array(index)

        estimation_size_agents = self.estimate_config.get(
            "estimation_size_agents",
            None)  # should be a proportion of the agent_set
        if estimation_size_agents == None:
            estimation_size_agents = 1.0
        else:
            estimation_size_agents = max(min(estimation_size_agents, 1.0),
                                         0.0)  # between 0 and 1

        if estimation_size_agents < 1.0:
            self.debug.print_debug("Sampling agents for estimation ...", 3)
            estimation_idx = sample_noreplace(
                arange(index.size), int(index.size * estimation_size_agents))
        else:
            estimation_idx = arange(index.size)

        estimation_idx = index[estimation_idx]
        self.debug.print_debug(
            "Number of observations for estimation: " +
            str(estimation_idx.size), 2)
        if estimation_idx.size <= 0:
            self.debug.print_debug("Nothing to be done.", 2)
            return (None, None)

        coefficients = create_coefficient_from_specification(specification)
        specified_coefficients = SpecifiedCoefficients().create(coefficients,
                                                                specification,
                                                                neqs=1)
        submodels = specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(
            submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(
            submodels,
            self.submodel_string,
            dataset,
            estimation_idx,
            dataset_pool=self.dataset_pool,
            resources=compute_resources,
            submodel_size_max=self.estimate_config.get('submodel_size_max',
                                                       None))
        variables = specified_coefficients.get_full_variable_names_without_constants(
        )
        self.debug.print_debug("Compute variables ...", 4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables,
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)

        coef = {}
        estimated_coef = {}
        self.outcome = {}
        dataset.compute_variables([outcome_attribute],
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)
        regression_resources = Resources(estimate_config)
        regression_resources.merge({"debug": self.debug})
        outcome_variable_name = VariableName(outcome_attribute)
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(
                specified_coefficients, submodel)
            self.increment_current_status_piece()
            logger.log_status("Estimate regression for submodel " +
                              str(submodel),
                              tags=["estimate"],
                              verbosity_level=2)
            logger.log_status("Number of observations: " +
                              str(self.observations_mapping[submodel].size),
                              tags=["estimate"],
                              verbosity_level=2)
            self.data[
                submodel] = dataset.create_regression_data_for_estimation(
                    coef[submodel],
                    index=estimation_idx[self.observations_mapping[submodel]])
            self.coefficient_names[submodel] = coef[
                submodel].get_coefficient_names_without_constant()[0, :]
            if (self.data[submodel].shape[0] > 0
                ) and (self.data[submodel].size > 0) and (
                    self.procedure
                    is not None):  # observations for this submodel available
                self.outcome[submodel] = dataset.get_attribute_by_index(
                    outcome_variable_name.get_alias(),
                    estimation_idx[self.observations_mapping[submodel]])
                regression_resources.merge({"outcome": self.outcome[submodel]})
                regression_resources.merge({
                    "coefficient_names":
                    self.coefficient_names[submodel].tolist(),
                    "constant_position":
                    coef[submodel].get_constants_positions()
                })
                estimated_coef[submodel] = self.procedure.run(
                    self.data[submodel],
                    self.regression,
                    resources=regression_resources)
                if "estimators" in estimated_coef[submodel].keys():
                    coef[submodel].set_coefficient_values(
                        estimated_coef[submodel]["estimators"])
                if "standard_errors" in estimated_coef[submodel].keys():
                    coef[submodel].set_standard_errors(
                        estimated_coef[submodel]["standard_errors"])
                if "other_measures" in estimated_coef[submodel].keys():
                    for measure in estimated_coef[submodel][
                            "other_measures"].keys():
                        coef[submodel].set_measure(
                            measure, estimated_coef[submodel]["other_measures"]
                            [measure])
                if "other_info" in estimated_coef[submodel].keys():
                    for info in estimated_coef[submodel]["other_info"]:
                        coef[submodel].set_other_info(
                            info, estimated_coef[submodel]["other_info"][info])
        coefficients.fill_coefficients(coef)

        self.save_predicted_values_and_errors(specification,
                                              coefficients,
                                              dataset,
                                              outcome_variable_name,
                                              index=index,
                                              data_objects=data_objects)

        return (coefficients, estimated_coef)

    def prepare_for_run(self,
                        dataset=None,
                        dataset_filter=None,
                        filter_threshold=0,
                        **kwargs):
        spec, coef = prepare_specification_and_coefficients(**kwargs)
        if (dataset is not None) and (dataset_filter is not None):
            filter_values = dataset.compute_variables(
                [dataset_filter], dataset_pool=self.dataset_pool)
            index = where(filter_values > filter_threshold)[0]
        else:
            index = None
        return (spec, coef, index)

    def prepare_for_estimate(self,
                             dataset=None,
                             dataset_filter=None,
                             filter_threshold=0,
                             **kwargs):
        spec = get_specification_for_estimation(**kwargs)
        if (dataset is not None) and (dataset_filter is not None):
            filter_values = dataset.compute_variables(
                [dataset_filter], dataset_pool=self.dataset_pool)
            index = where(filter_values > filter_threshold)[0]
        else:
            index = None
        return (spec, index)

    def get_data_as_dataset(self, submodel=-2):
        """Like get_all_data, but the retuning value is a Dataset containing attributes that
        correspond to the data columns. Their names are coefficient names."""
        all_data = self.get_all_data(submodel)
        if all_data is None:
            return None
        names = self.get_coefficient_names(submodel)
        if names is None:
            return None
        dataset_data = {}
        for i in range(names.size):
            dataset_data[names[i]] = all_data[:, i].reshape(all_data.shape[0])
        dataset_data["id"] = arange(all_data.shape[0]) + 1
        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name='dataset', table_data=dataset_data)
        ds = Dataset(in_storage=storage, id_name="id", in_table_name='dataset')
        return ds

    def save_predicted_values_and_errors(self,
                                         specification,
                                         coefficients,
                                         dataset,
                                         outcome_variable,
                                         index=None,
                                         data_objects=None):
        if self.estimate_config.get('save_predicted_values_and_errors', False):
            logger.log_status('Computing predicted values and residuals.')
            original_values = dataset.get_attribute_by_index(
                outcome_variable, index)
            predicted_values = zeros(dataset.size(), dtype='float32')
            predicted_values[index] = self.run_after_estimation(
                specification,
                coefficients,
                dataset,
                index=index,
                data_objects=data_objects)
            predicted_attribute_name = 'predicted_%s' % outcome_variable.get_alias(
            )
            dataset.add_primary_attribute(name=predicted_attribute_name,
                                          data=predicted_values)
            dataset.flush_attribute(predicted_attribute_name)
            predicted_error_attribute_name = 'residuals_%s' % outcome_variable.get_alias(
            )
            error_values = zeros(dataset.size(), dtype='float32')
            error_values[index] = (original_values -
                                   predicted_values[index]).astype(
                                       error_values.dtype)
            dataset.add_primary_attribute(name=predicted_error_attribute_name,
                                          data=error_values)
            dataset.flush_attribute(predicted_error_attribute_name)
            logger.log_status(
                'Predicted values saved as %s (for the %s dataset)' %
                (predicted_attribute_name, dataset.get_dataset_name()))
            logger.log_status(
                'Residuals saved as %s (for the %s dataset)' %
                (predicted_error_attribute_name, dataset.get_dataset_name()))

    def export_estimation_data(self,
                               submodel=-2,
                               file_name='./estimation_data_regression.txt',
                               delimiter='\t'):
        import os
        from numpy import newaxis
        data = concatenate((self.outcome[submodel][..., newaxis],
                            self.get_all_data(submodel=submodel)),
                           axis=1)
        header = ['outcome'] + self.get_coefficient_names(submodel).tolist()
        nrows = data.shape[0]
        file_name_root, file_name_ext = os.path.splitext(file_name)
        out_file = "%s_submodel_%s.txt" % (file_name_root, submodel)
        fh = open(out_file, 'w')
        fh.write(delimiter.join(header) + '\n')  #file header
        for row in range(nrows):
            line = [str(x) for x in data[row, ]]
            fh.write(delimiter.join(line) + '\n')
        fh.flush()
        fh.close
        print 'Data written into %s' % out_file

    def run_after_estimation(self, *args, **kwargs):
        return self.run(*args, **kwargs)

    def _get_status_total_pieces(self):
        return ChunkModel._get_status_total_pieces(
            self) * self.get_status_for_gui().get_total_number_of_pieces()

    def _get_status_current_piece(self):
        return ChunkModel._get_status_current_piece(
            self) * self.get_status_for_gui().get_total_number_of_pieces(
            ) + self.get_status_for_gui().get_current_piece()

    def _get_status_piece_description(self):
        return "%s %s" % (ChunkModel._get_status_piece_description(
            self), self.get_status_for_gui().get_current_piece_description())

    def get_specified_coefficients(self):
        return self.specified_coefficients
Esempio n. 18
0
    def get_resources(self, data_dictionary, dataset):
        """Create resources for computing a variable. """
        resources = Resources()

        for key in data_dictionary.keys():
            if key in self.datasets:
                data = data_dictionary[key]

                storage = StorageFactory().get_storage('dict_storage')

                if self.id_names[key] not in data_dictionary[key].keys(
                ) and not isinstance(self.id_names[key], list):
                    data[self.id_names[key]] = arange(
                        1,
                        len(data_dictionary[key][data_dictionary[key].keys()
                                                 [0]]) + 1)  # add id array

                id_name = self.id_names[key]
                storage.write_table(table_name='data', table_data=data)

                if key == "gridcell":
                    gc = GridcellDataset(in_storage=storage,
                                         in_table_name='data')

                    # add relative_x and relative_y
                    gc.get_id_attribute()
                    n = int(ceil(sqrt(gc.size())))
                    if "relative_x" not in data.keys():
                        x = (indices((n, n)) + 1)[1].ravel()
                        gc.add_attribute(x[0:gc.size()],
                                         "relative_x",
                                         metadata=1)
                    if "relative_y" not in data.keys():
                        y = (indices((n, n)) + 1)[0].ravel()
                        gc.add_attribute(y[0:gc.size()],
                                         "relative_y",
                                         metadata=1)
                    resources.merge({key: gc})

                elif key == "household":
                    resources.merge({
                        key:
                        HouseholdDataset(in_storage=storage,
                                         in_table_name='data')
                    })
                elif key == "development_project":
                    resources.merge({
                        key:
                        DevelopmentProjectDataset(in_storage=storage,
                                                  in_table_name='data')
                    })
                elif key == "development_event":
                    resources.merge({
                        key:
                        DevelopmentEventDataset(in_storage=storage,
                                                in_table_name='data')
                    })
                elif key == "neighborhood":
                    resources.merge({
                        key:
                        NeighborhoodDataset(in_storage=storage,
                                            in_table_name='data')
                    })
                elif key == "job":
                    resources.merge({
                        key:
                        JobDataset(in_storage=storage, in_table_name='data')
                    })
                elif key == "zone":
                    resources.merge({
                        key:
                        ZoneDataset(in_storage=storage, in_table_name='data')
                    })
                elif key == "travel_data":
                    resources.merge({
                        key:
                        TravelDataDataset(in_storage=storage,
                                          in_table_name='data')
                    })
                elif key == "faz":
                    resources.merge({
                        key:
                        FazDataset(in_storage=storage, in_table_name='data')
                    })
                elif key == "fazdistrict":
                    resources.merge({
                        key:
                        FazdistrictDataset(in_storage=storage,
                                           in_table_name='data')
                    })
                elif key == "race":
                    resources.merge({
                        key:
                        RaceDataset(in_storage=storage, in_table_name='data')
                    })
                elif key == "county":
                    resources.merge({
                        key:
                        CountyDataset(in_storage=storage, in_table_name='data')
                    })
                elif key == "large_area":
                    resources.merge({
                        key:
                        LargeAreaDataset(in_storage=storage,
                                         in_table_name='data')
                    })
                elif key == "development_group":
                    resources.merge({
                        key:
                        DevelopmentGroupDataset(in_storage=storage,
                                                in_table_name='data')
                    })
                elif key == "employment_sector_group":
                    resources.merge({
                        key:
                        EmploymentSectorGroupDataset(in_storage=storage,
                                                     in_table_name='data')
                    })
                elif key == "plan_type_group":
                    resources.merge({
                        key:
                        PlanTypeGroupDataset(in_storage=storage,
                                             in_table_name='data')
                    })
                elif key == "building":
                    resources.merge({
                        key:
                        BuildingDataset(in_storage=storage,
                                        in_table_name='data')
                    })

            else:
                resources.merge({key: data_dictionary[key]})

        if dataset in self.interactions:
            if dataset == "household_x_gridcell":
                resources.merge({
                    "dataset":
                    HouseholdXGridcellDataset(dataset1=resources["household"],
                                              dataset2=resources["gridcell"])
                })
            if dataset == "job_x_gridcell":
                resources.merge({
                    "dataset":
                    JobXGridcellDataset(dataset1=resources["job"],
                                        dataset2=resources["gridcell"])
                })
            if dataset == "household_x_zone":
                resources.merge({
                    "dataset":
                    HouseholdXZoneDataset(dataset1=resources["household"],
                                          dataset2=resources["zone"])
                })
            if dataset == "household_x_neighborhood":
                resources.merge({
                    "dataset":
                    HouseholdXNeighborhoodDataset(
                        dataset1=resources["household"],
                        dataset2=resources["neighborhood"])
                })
            if dataset == "development_project_x_gridcell":
                resources.merge({
                    "dataset":
                    DevelopmentProjectXGridcellDataset(
                        dataset1=resources["development_project"],
                        dataset2=resources["gridcell"])
                })

        else:
            resources.merge({"dataset": resources[dataset]})
        resources.merge({"check_variables": '*', "debug": 4})
        return resources
Esempio n. 19
0
    def run( self, building_set,
#             building_use_table,
             building_use_classification_table,
             vacancy_table,
             history_table,
             year,
             location_set,
             resources=None ):
        building_classes = building_use_classification_table.get_attribute("name")
        unit_attributes = building_use_classification_table.get_attribute('units')
        building_id_name = building_set.get_id_name()[0]
        location_id_name = location_set.get_id_name()[0]
        new_buildings = {building_id_name: array([], dtype='int32'),
                         "building_use_id":array([], dtype=int8),
                         "year_built": array([], dtype='int32'),
#                         "building_sqft": array([], dtype='int32'),
#                         "residential_units": array([], dtype='int32'),
                         "unit_price": array([], dtype= float32),
                         location_id_name: array([], dtype='int32')}
        for attribute in unit_attributes:
            new_buildings[attribute] = array([], dtype='int32')

        max_id = building_set.get_id_attribute().max()
        building_set_size_orig = building_set.size()

        for itype in range(building_use_classification_table.size()): # iterate over building types
            building_class = building_classes[itype]
#            type_code = building_types_table.get_id_attribute()[itype]
            vacancy_attribute = 'target_total_%s_vacancy' % building_class
            if vacancy_attribute not in vacancy_table.get_known_attribute_names():
                logger.log_warning("No target vacancy for building class '%s'. Transition model for this building class skipped." % type)
                continue
            vacancy_table.get_attribute(vacancy_attribute)  # ensures that the attribute is loaded
            target_vacancy_rate = eval("vacancy_table.get_data_element_by_id( year ).%s" % vacancy_attribute)

            compute_resources = Resources(resources)
            compute_resources.merge({"debug":self.debug})
            units_attribute = unit_attributes[itype]
            vacant_units_attribute = 'vacant_' + units_attribute

            # determine current-year vacancy rates
            building_set.compute_variables("urbansim_parcel.building." + vacant_units_attribute,
                                           resources = compute_resources)

            vacant_units_sum = building_set.get_attribute(vacant_units_attribute).sum()
            units_sum = float( building_set.get_attribute(units_attribute).sum() )
            vacant_rate = self.safe_divide(vacant_units_sum, units_sum)

            should_develop_units = max( 0, ( target_vacancy_rate * units_sum - vacant_units_sum ) /
                                         ( 1 - target_vacancy_rate ) )
            logger.log_status(building_class + ": vacant units: %d, should be vacant: %f, sum units: %d"
                          % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum))

            if not should_develop_units:
                logger.log_note(("Will not build any " + building_class + " units, because the current vacancy of %d units\n"
                             + "is more than the %d units desired for the vacancy rate of %f.")
                            % (vacant_units_sum,
                               target_vacancy_rate * units_sum,
                               target_vacancy_rate))
                continue

#            average_buildings_value = None
#            if (type+"_improvement_value") in location_set.get_known_attribute_names():
#                average_buildings_value = self.safe_divide(
#                    location_set.get_attribute(type+"_improvement_value" ).sum(), units_sum)

            #create buildings

            history_values = history_table.get_attribute(units_attribute)
            index_non_zeros_values = where( history_values > 0 )[0]
            history_values_without_zeros = history_values[index_non_zeros_values]
            history_type = history_table.get_attribute("building_use_id")
            history_type_without_zeros = history_type[index_non_zeros_values]
            history_price = history_table.get_attribute("unit_price")
            history_price_without_zeros = history_price[index_non_zeros_values]

            #TODO: what happens if history has only zeroes?
            mean_size = history_values_without_zeros.mean()
            idx = array( [] )
            # Ensure that there are some development projects to choose from.
            #TODO: should the 'int' in the following line be 'ceil'?
            num_of_projects_to_select = max( 10, int( should_develop_units / mean_size ) )
            while True:
                idx = concatenate( ( idx, randint( 0, history_values_without_zeros.size,
                                                   size= num_of_projects_to_select ) ) )
                csum = history_values_without_zeros[idx].cumsum()
                idx = idx[where( csum <= should_develop_units )]
                if csum[-1] >= should_develop_units:
                    break

            nbuildings = idx.size

            for attribute in unit_attributes:

                #if attribute == units_attribute:
                    #new_unit_values = history_values_without_zeros[idx]
                #else:
                    #new_unit_values = zeros(nbuildings)
                #to accomodate mixed use buildings, allow non units_attribute to be non-zero
                new_unit_values = history_table.get_attribute(attribute)[index_non_zeros_values[idx]]

                new_buildings[attribute] = concatenate((new_buildings[attribute], new_unit_values))

            new_max_id = max_id + nbuildings
            new_buildings[building_id_name]=concatenate((new_buildings[building_id_name], arange(max_id+1, new_max_id+1)))
            new_buildings["building_use_id"] = concatenate((new_buildings["building_use_id"], history_type_without_zeros[idx]))
            new_buildings["year_built"] = concatenate((new_buildings["year_built"], year*ones(nbuildings, dtype="int32")))
            new_buildings["unit_price"] = concatenate((new_buildings["unit_price"], history_price_without_zeros[idx]))
            new_buildings[location_id_name] = concatenate((new_buildings[location_id_name], zeros(nbuildings, dtype="int32")))
            logger.log_status("Creating %s %s of %s %s buildings." % (history_values_without_zeros[idx].sum(),
                                                                      units_attribute, nbuildings, building_class))

        building_set.add_elements(new_buildings, require_all_attributes=False)

        difference = building_set.size() - building_set_size_orig
        index = arange(difference) + building_set_size_orig
        return index
Esempio n. 20
0
    def run( self, building_set,
             new_building_copy_attrs,
             building_type_table,
             building_type_classification_table,
             vacancy_table,
             history_table,
             year,
             location_set,
             resources=None ):
        building_classes = building_type_classification_table.get_attribute("name")
        unit_attributes = building_type_classification_table.get_attribute('units')
        building_id_name = building_set.get_id_name()[0]
        location_id_name = location_set.get_id_name()[0]
        calc_attributes = [building_id_name, location_id_name, "year_built"]
        new_buildings   = {}
        for attribute in new_building_copy_attrs:
            new_buildings[attribute] = array([], dtype=building_set.get_data_type(attribute))
        for attribute in calc_attributes:
            new_buildings[attribute] = array([], dtype=building_set.get_data_type(attribute))
            
        # for convenience, make a map of building_type_id => (building_type)class_id
        # these names are hard-wired elsewhere
        building_type_id_to_class_id = {}
        building_type_ids = building_type_table.get_attribute("building_type_id")
        for idx in range(building_type_table.size()):
            building_type_id_to_class_id[building_type_ids[idx]] = \
                building_type_table.get_attribute("class_id")[idx]
        logger.log_status("building_type_id_to_class_id = " + str(building_type_id_to_class_id))
        
        # and make an column for the history table of the use classes
        history_type_classes = zeros( (history_table.size()), dtype=int8)
        history_types = history_table.get_attribute("building_type_id")
        for idx in range(history_table.size()):
            history_type_classes[idx] = building_type_id_to_class_id[history_types[idx]]
        logger.log_status("history_types=" + str(history_types))
        logger.log_status("history_type_classes=" + str(history_type_classes))

        max_id = building_set.get_id_attribute().max()
        new_building_id_start = max_id + 1
        new_building_id_end = max_id + 1
        building_set_size_orig = building_set.size()

        for itype in range(building_type_classification_table.size()): # iterate over building types
            building_class = building_classes[itype]
            building_class_id = building_type_classification_table.get_attribute("class_id")[itype]
            
            vacancy_attribute = 'target_total_%s_vacancy' % building_class.lower()
            if vacancy_attribute not in vacancy_table.get_known_attribute_names():
                logger.log_warning("No target vacancy for building class '%s' (e.g. no '%s' in target_vacancies). Transition model for this building class skipped." 
                                   % (building_class,vacancy_attribute)) 
                continue
            vacancy_table.get_attribute(vacancy_attribute)  # ensures that the attribute is loaded
            target_vacancy_rate = eval("vacancy_table.get_data_element_by_id( year ).%s" % vacancy_attribute)
            logger.log_status("Target vacancy rate for building_class %s is %f" % (building_class, target_vacancy_rate))

            compute_resources = Resources(resources)
            compute_resources.merge({"debug":self.debug})
            units_attribute         = unit_attributes[itype]
            occupied_sqft_attribute = 'occupied_sqft_of_typeclass_%s' % building_class.lower()
            total_sqft_attribute    = 'where(sanfrancisco.building.building_typeclass_name==\'%s\',sanfrancisco.building.building_sqft,0)' % building_class.lower()

            # determine current-year vacancy rates
            building_set.compute_variables(("sanfrancisco.building." + occupied_sqft_attribute,
                                            total_sqft_attribute), 
                                            resources = compute_resources)

            occupied_sqft_sum   = building_set.get_attribute(occupied_sqft_attribute).sum()
            total_sqft_sum      = float( building_set.get_attribute(total_sqft_attribute).sum() )
            occupancy_rate      = self.safe_divide(occupied_sqft_sum, total_sqft_sum)
            # cap it at 1.0
            if occupancy_rate > 1.0: occupancy_rate = 1.0
            vacancy_rate        = 1.0 - occupancy_rate
            vacant_sqft_sum     = vacancy_rate * total_sqft_sum

            should_develop_sqft = (target_vacancy_rate*total_sqft_sum) - vacant_sqft_sum
            logger.log_status("%s: vacancy rate: %4.3f   occupancy rate: %4.3f" 
                              % (building_class, vacancy_rate, occupancy_rate))
            logger.log_status("%s: vacant: %d, should be vacant: %f, sum units: %d"
                          % (building_class, vacant_sqft_sum, target_vacancy_rate*total_sqft_sum, total_sqft_sum))

            if should_develop_sqft <= 0:
                logger.log_note(("Will not build any %s units, because the current vacancy of %d sqft\n"
                             + "is more than the %d sqft desired for the vacancy rate of %f.")
                            % (building_class,
                               vacant_sqft_sum,
                               target_vacancy_rate*total_sqft_sum,
                               target_vacancy_rate))
                continue

            #create buildings

            # find sample set of qualifying buildings in the events history, 
            # e.g. where the building_type is in the correct class, and a positive 
            # number of units or sqft (or whatever) were present
            history_sqft = history_table.get_attribute('building_sqft')
            index_sampleset = where( (history_sqft > 0) & (history_type_classes==building_class_id))[0]

            # Ensure that there are some development projects to choose from.
            logger.log_status("should_develop_sqft=" + str(should_develop_sqft))
            if index_sampleset.shape[0] == 0:
                logger.log_warning("Cannot create new buildings for building use class %s; no buildings in the event history table from which to sample."
                                   % building_class) 
                continue
            
            history_sqft_sampleset = history_sqft[index_sampleset]            
            logger.log_status("history_sqft_sampleset = " + str(history_sqft_sampleset))

            mean_size = history_sqft_sampleset.mean()
            idx = array( [] ,dtype="int32")
            #TODO: should the 'int' in the following line be 'ceil'?
            num_of_projects_to_select = max( 10, int( should_develop_sqft / mean_size ) )
            while True:
                idx = concatenate( ( idx, randint( 0, history_sqft_sampleset.size,
                                                   size=num_of_projects_to_select) ) )
                csum = history_sqft_sampleset[idx].cumsum()
                idx = idx[where( csum <= should_develop_sqft )]
                if csum[-1] >= should_develop_sqft:
                    break
            
            logger.log_status("idx = " + str(idx))

            nbuildings = idx.size
            if nbuildings == 0: continue

            new_building_id_end = new_building_id_start + nbuildings

            # copy_attributes
            for attribute in new_building_copy_attrs:
                attr_values = history_table.get_attribute(attribute)[index_sampleset[idx]]
                new_buildings[attribute] = concatenate((new_buildings[attribute], attr_values))
            
            # calc_attributes
            new_buildings[building_id_name] =concatenate((new_buildings[building_id_name], arange(new_building_id_start, new_building_id_end)))
            new_buildings[location_id_name] = concatenate((new_buildings[location_id_name], zeros(nbuildings)))
            new_buildings["year_built"] = concatenate((new_buildings["year_built"], year*ones(nbuildings)))
            logger.log_status("Creating %s sqft of %s %s buildings." % (history_sqft_sampleset[idx].sum(),
                                                                      nbuildings, building_class))
            new_building_id_start = new_building_id_end + 1
            logger.log_status(new_buildings)
        building_set.add_elements(new_buildings, require_all_attributes=False)

        difference = building_set.size() - building_set_size_orig
        index = arange(difference) + building_set_size_orig
        return index
Esempio n. 21
0
    def preprocess_projects(self,
                            agent_set,
                            agents_index=None,
                            data_objects=None):
        """Split projects that don't find enough choices to smaller ones (of average size).
        """
        resources = Resources(data_objects)
        resources.merge({"debug": self.debug})

        self.choice_set.compute_variables([
            self.developable_maximum_unit_full_name,
            self.developable_minimum_unit_full_name
        ],
                                          resources=resources)

        max_capacity = self.choice_set.get_attribute(
            self.developable_maximum_unit_short_name)
        min_capacity = self.choice_set.get_attribute(
            self.developable_minimum_unit_short_name)

        self.set_choice_set_size()
        nchoices = self.get_choice_set_size()
        project_average_size = agent_set.get_attribute(
            agent_set.get_attribute_name()).mean()
        add_projects = 0
        remove_projects = 0

        if agents_index == None:
            agents_index = arange(agent_set.size())
        # order agents by size
        ordered_indices = argsort(-1 * agent_set.get_attribute_by_index(
            agent_set.get_attribute_name(), agents_index))
        improvement_values = []
        projects_ids = agent_set.get_id_attribute()[agents_index].tolist()
        #   how many projects fit in each developable location
        project_sizes = agent_set.get_attribute_by_index(
            agent_set.get_attribute_name(), agents_index)
        for iagent in ordered_indices:
            project_size = project_sizes[iagent]
            capacity = logical_and(project_size > min_capacity,
                                   (max_capacity / project_size) > 0)
            if where(capacity)[0].size < nchoices:  # not enough choices found
                nsplitted = int(project_size / project_average_size)
                add_projects += nsplitted
                remove_projects += 1
                projects_ids.remove(
                    agent_set.get_id_attribute()[agents_index[iagent]])
                improvement_values = improvement_values + \
                    nsplitted*[agent_set.get_attribute_by_index("improvement_value", agents_index[iagent])]
            else:
                break  # we can break here, since the projects are sorted by size

        if remove_projects > 0:
            agent_set.remove_elements(
                agents_index[ordered_indices[0:remove_projects]])
            agents_index = agent_set.get_id_index(projects_ids)

        if add_projects > 0:
            max_id = agent_set.get_attribute(agent_set.get_id_name()[0]).max()
            ids = arange(max_id + 1, max_id + 1 + add_projects)
            agent_set.add_elements(data={
                "project_id":
                ids,
                self.location_set.get_id_name()[0]:
                zeros((add_projects, )),
                "improvement_value":
                array(improvement_values),
                agent_set.get_attribute_name():
                project_average_size * ones((add_projects, ))
            },
                                   require_all_attributes=False)
            agents_index = agent_set.get_id_index(projects_ids + ids.tolist())
Esempio n. 22
0
class Estimator(GenericModelExplorer):
    def __init__(self, config=None, save_estimation_results=False):
        if 'cache_directory' not in config or config['cache_directory'] is None:
            raise KeyError("The cache directory must be specified in the "
                "given configuration, giving the filesystem path to the cache "
                "directory containing the data with which to estimate. Please "
                "check that your configuration contains the 'cache_directory' "
                "entry and that it is not None.")

        self.simulation_state = SimulationState(new_instance=True)
        self.simulation_state.set_cache_directory(config['cache_directory'])

        SessionConfiguration(new_instance=True,
                             package_order=config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        self.config = Resources(config)
        self.save_estimation_results = save_estimation_results
        self.debuglevel = self.config.get("debuglevel", 4)
        self.model_system = ModelSystem()
        self.agents_index_for_prediction = None
        
        models = self.config.get('models',[])

        self.model_name = None
        if "model_name" in config.keys():
            self.model_name = config["model_name"]
        else:
            for model in models:
                if isinstance(model, dict):
                    model_name = model.keys()[0]
                    if (model[model_name] == "estimate") or (isinstance(model[model_name], list)
                        and ("estimate" in model[model_name])):
                            self.model_name = model_name
                            break
        estimate_config_changes = self.config.get('config_changes_for_estimation', {}).get('estimate_config', {})
        if len(estimate_config_changes) > 0:
            change = Resources({'models_configuration': {self.model_name: {'controller': {'init': {'arguments': {}}}}}})
            estimate_config_str = self.config['models_configuration'].get(self.model_name, {}).get('controller', {}).get('init', {}).get('arguments', {}).get('estimate_config', '{}')
            estimate_config = Resources({})
            try:
                estimate_config = eval(estimate_config_str)
            except:
                pass
 
            estimate_config.merge(estimate_config_changes)
            self.config.merge(change)
            self.config['models_configuration'][self.model_name]['controller']['init']['arguments']['estimate_config'] = 'Resources(%s)' % estimate_config

            
       
    def estimate(self, out_storage=None):
        self.model_system.run(self.config, write_datasets_to_cache_at_end_of_year=False)
        self.extract_coefficients_and_specification()

        if self.save_estimation_results:
            self.save_results(out_storage=out_storage)

    def reestimate(self, specification_module_name=None, specification_dict=None, out_storage=None, type=None, submodels=None):
        """specification_module_name is name of a module that contains a dictionary called
        'specification'. If it is not given, the argument specification_dict must be given which is a dictionary object.
        'type' is the name of model member, such as 'commercial', 'residential'. The specification dictionary
        is expected to have an entry of this name. If 'submodels' is given (list or a number),
        the restimation is done only for those submodels.
        """
        if specification_module_name is not None:
            exec("import " + specification_module_name)
            eval("reload (" + specification_module_name + ")")
            exec("specification_dict =" + specification_module_name + ".specification")
            
        if type is not None:
            specification_dict = specification_dict[type]
        if submodels is not None: #remove all submodels but the given ones from specification
            submodels_to_be_deleted = specification_dict.keys()
            if not isinstance(submodels, list):
                submodels = [submodels]
            for sm in submodels:
                if sm not in submodels_to_be_deleted:
                    raise ValueError, "Submodel %s not in the specification." % sm
                submodels_to_be_deleted.remove(sm)
                if "_definition_" in submodels_to_be_deleted:
                    submodels_to_be_deleted.remove("_definition_")
            for sm in submodels_to_be_deleted:
                del specification_dict[sm]
        self.specification = EquationSpecification(specification_dict=specification_dict)
        new_namespace = self.model_system.run_year_namespace
        keys_coeff_spec = self.get_keys_for_coefficients_and_specification()
        new_namespace[keys_coeff_spec["specification"]] = self.specification
        self.coefficients, coeff_dict_dummy = self.model_system.do_process(new_namespace)
        ## update run_year_namespce since it's not been updated by do_process
        self.model_system.run_year_namespace = new_namespace
        self.model_system.run_year_namespace[keys_coeff_spec["coefficients"]] = self.coefficients
        
        ## this gets coeff and spec from run_year_namespce and is only updated in _run_year method
        #self.extract_coefficients_and_specification()  
        if self.save_estimation_results:
            self.save_results(out_storage=out_storage)

    def predict(self, predicted_choice_id_name, agents_index=None):
        """ Run prediction. Currently makes sense only for choice models."""
        # Create temporary configuration where all words 'estimate' are replaced by 'run'
        tmp_config = Resources(self.config)
        
        if self.agents_index_for_prediction is None:
            self.agents_index_for_prediction = self.get_agent_set_index().copy()
            
        if agents_index is None:
            agents_index = self.agents_index_for_prediction
        
        tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['coefficients'] = "coeff_est"
        tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['agents_index'] = "agents_index"
        tmp_config['models_configuration'][self.model_name]['controller']['run']['arguments']['chunk_specification'] = "{'nchunks':1}"

        ### save specification and coefficients to cache (no matter the save_estimation_results flag)
        ### so that the prepare_for_run method could load specification and coefficients from there
        #output_configuration = self.config['output_configuration']
        #del self.config['output_configuration']
        #self.save_results()
        
        #self.config['output_configuration'] = output_configuration
        
        #self.model_system.run_year_namespace["coefficients"] = self.coefficients
        #del tmp_config['models_configuration'][self.model_name]['controller']['prepare_for_run']
        
        try:
            run_year_namespace = copy.copy(self.model_system.run_year_namespace)
        except:
            logger.log_error("The estimate() method must be run first")
            return False
        
        try:
            agents = self.get_agent_set()
            choice_id_name = self.get_choice_set().get_id_name()[0]
            # save current locations of agents
            current_choices = agents.get_attribute(choice_id_name).copy()
            dummy_data = zeros(current_choices.size, dtype=current_choices.dtype)-1
            agents.modify_attribute(name=choice_id_name, data=dummy_data) #reset all choices
            
            run_year_namespace["process"] = "run"
            run_year_namespace["coeff_est"] = self.coefficients
            run_year_namespace["agents_index"] = agents_index
            run_year_namespace["processmodel_config"] = tmp_config['models_configuration'][self.model_name]['controller']['run']
            new_choices = self.model_system.do_process(run_year_namespace)
            
            #self.model_system.run(tmp_config, write_datasets_to_cache_at_end_of_year=False)
            #new_choices = agents.get_attribute(choice_id_name).copy()
            agents.modify_attribute(name=choice_id_name, data=current_choices)
            dummy_data[agents_index] = new_choices
            if predicted_choice_id_name not in agents.get_known_attribute_names():
                agents.add_primary_attribute(name=predicted_choice_id_name, data=dummy_data)
            else:
                agents.modify_attribute(name=predicted_choice_id_name, data=dummy_data)
            logger.log_status("Predictions saved into attribute " + predicted_choice_id_name)
            return True
        except Exception, e:
            logger.log_error("Error encountered in prediction: %s" % e)
            logger.log_stack_trace()
        
        return False
    def estimate(self, specification, dataset, outcome_attribute, index = None, procedure=None, data_objects=None,
                        estimate_config=None,  debuglevel=0):
        """'specification' is of type EquationSpecification,
            'dataset' is of type Dataset,
            'outcome_attribute' - string that determines the dependent variable,
            'index' are indices of individuals in dataset for which
                    the model runs. If it is None, the whole dataset is considered.
            'procedure' - name of the estimation procedure. If it is None,
                there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class
                must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'.
                It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays).
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure.
            'debuglevel' overwrites the class 'debuglevel'.
        """
        #import wingdbstub
        self.debug.flag = debuglevel
        if estimate_config == None:
            estimate_config = Resources()
        if not isinstance(estimate_config,Resources) and isinstance(estimate_config, dict):
            estimate_config = Resources(estimate_config)
        self.estimate_config = estimate_config.merge_with_defaults(self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.procedure=procedure
        if self.procedure == None:
            self.procedure = self.estimate_config.get("estimation", None)
        if self.procedure is not None:
            self.procedure = ModelComponentCreator().get_model_component(self.procedure)
        else:
            logger.log_warning("No estimation procedure given, or problems with loading the corresponding module.")

        compute_resources = Resources({"debug":self.debug})
        if dataset.size()<=0: # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())
        if not isinstance(index,ndarray):
            index=array(index)

        estimation_size_agents = self.estimate_config.get("estimation_size_agents", None) # should be a proportion of the agent_set
        if estimation_size_agents == None:
            estimation_size_agents = 1.0
        else:
            estimation_size_agents = max(min(estimation_size_agents,1.0),0.0) # between 0 and 1

        if estimation_size_agents < 1.0:
            self.debug.print_debug("Sampling agents for estimation ...",3)
            estimation_idx = sample_noreplace(arange(index.size),
                                                         int(index.size*estimation_size_agents))
        else:
            estimation_idx = arange(index.size)

        estimation_idx = index[estimation_idx]
        self.debug.print_debug("Number of observations for estimation: " + str(estimation_idx.size),2)
        if estimation_idx.size <= 0:
            self.debug.print_debug("Nothing to be done.",2)
            return (None, None)

        coefficients = create_coefficient_from_specification(specification)
        self.specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1)
        submodels = self.specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(submodels, self.submodel_string, dataset, estimation_idx,
                                      dataset_pool=self.dataset_pool, resources = compute_resources,
                                      submodel_size_max=self.estimate_config.get('submodel_size_max', None))
        variables = self.specified_coefficients.get_full_variable_names_without_constants()
        self.debug.print_debug("Compute variables ...",4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources = compute_resources)

        coef = {}
        estimated_coef={}
        self.outcome = {}
        dataset.compute_variables([outcome_attribute], dataset_pool=self.dataset_pool, resources=compute_resources)
        regression_resources=Resources(estimate_config)
        regression_resources.merge({"debug":self.debug})
        outcome_variable_name = VariableName(outcome_attribute)
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(self.specified_coefficients,submodel)
            self.increment_current_status_piece()
            logger.log_status("Estimate regression for submodel " +str(submodel),
                               tags=["estimate"], verbosity_level=2)
            #logger.log_status("Number of observations: " +str(self.observations_mapping[submodel].size),
                               #tags=["estimate"], verbosity_level=2)
            self.data[submodel] = dataset.create_regression_data_for_estimation(coef[submodel],
                                                            index = estimation_idx[self.observations_mapping[submodel]])
            self.coefficient_names[submodel] = coef[submodel].get_coefficient_names_without_constant()[0,:]
            if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0) and (self.procedure is not None): # observations for this submodel available
                self.outcome[submodel] = dataset.get_attribute_by_index(outcome_variable_name.get_alias(), estimation_idx[self.observations_mapping[submodel]])   
                regression_resources.merge({"outcome":  self.outcome[submodel]})
                regression_resources.merge({"coefficient_names":self.coefficient_names[submodel].tolist(),
                            "constant_position": coef[submodel].get_constants_positions()})
                regression_resources.merge({"submodel": submodel})
                estimated_coef[submodel] = self.procedure.run(self.data[submodel], self.regression,
                                                        resources=regression_resources)
                if "estimators" in estimated_coef[submodel].keys():
                    coef[submodel].set_coefficient_values(estimated_coef[submodel]["estimators"])
                if "standard_errors" in estimated_coef[submodel].keys():
                    coef[submodel].set_standard_errors(estimated_coef[submodel]["standard_errors"])
                if "other_measures" in estimated_coef[submodel].keys():
                    for measure in estimated_coef[submodel]["other_measures"].keys():
                        coef[submodel].set_measure(measure,
                              estimated_coef[submodel]["other_measures"][measure])
                if "other_info" in estimated_coef[submodel].keys():
                    for info in estimated_coef[submodel]["other_info"]:
                        coef[submodel].set_other_info(info,
                              estimated_coef[submodel]["other_info"][info])
        coefficients.fill_coefficients(coef)
        self.specified_coefficients.coefficients = coefficients
        self.save_predicted_values_and_errors(specification, coefficients, dataset, outcome_variable_name, index=index, data_objects=data_objects)
            
        return (coefficients, estimated_coef)
class RegressionModel(ChunkModel):

    model_name = "Regression Model"
    model_short_name = "RM"

    def __init__(self, regression_procedure="opus_core.linear_regression",
                  submodel_string=None,
                  run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None):
 
        self.debug = DebugPrinter(debuglevel)

        self.dataset_pool = self.create_dataset_pool(dataset_pool)

        self.regression = RegressionModelFactory().get_model(name=regression_procedure)
        if self.regression == None:
            raise StandardError, "No regression procedure given."

        self.submodel_string = submodel_string

        self.run_config = run_config
        if self.run_config == None:
            self.run_config = Resources()
        if not isinstance(self.run_config,Resources) and isinstance(self.run_config, dict):
            self.run_config = Resources(self.run_config)

        self.estimate_config = estimate_config
        if self.estimate_config == None:
            self.estimate_config = Resources()
        if not isinstance(self.estimate_config,Resources) and isinstance(self.estimate_config, dict):
            self.estimate_config = Resources(self.estimate_config)
            
        self.data = {}
        self.coefficient_names = {}
        ChunkModel.__init__(self)
        self.get_status_for_gui().initialize_pieces(3, pieces_description = array(['initialization', 'computing variables', 'submodel: 1']))

    def run(self, specification, coefficients, dataset, index=None, chunk_specification=None,
            data_objects=None, run_config=None, initial_values=None, procedure=None, debuglevel=0):
        """'specification' is of type EquationSpecification,
            'coefficients' is of type Coefficients,
            'dataset' is of type Dataset,
            'index' are indices of individuals in dataset for which
                        the model runs. If it is None, the whole dataset is considered.
            'chunk_specification' determines  number of chunks in which the simulation is processed.
            'data_objects' is a dictionary where each key is the name of an data object
            ('zone', ...) and its value is an object of class  Dataset.
           'run_config' is of type Resources, it gives additional arguments for the run.
           If 'procedure' is given, it overwrites the regression_procedure of the constructor.
           'initial_values' is an array of the initial values of the results. It will be overwritten
           by the results for those elements that are handled by the model (defined by submodels in the specification).
           By default the results are initialized with 0.
            'debuglevel' overwrites the constructor 'debuglevel'.
        """
        self.debug.flag = debuglevel
        if run_config == None:
            run_config = Resources()
        if not isinstance(run_config,Resources) and isinstance(run_config, dict):
            run_config = Resources(run_config)
        self.run_config = run_config.merge_with_defaults(self.run_config)
        self.run_config.merge({"debug":self.debug})
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.replace_dataset(dataset.get_dataset_name(), dataset)
        if procedure is not None: 
            self.regression = RegressionModelFactory().get_model(name=procedure)
        if initial_values is None:
            self.initial_values = zeros((dataset.size(),), dtype=float32)
        else:
            self.initial_values = zeros((dataset.size(),), dtype=initial_values.dtype)
            self.initial_values[index] = initial_values
            
        if dataset.size()<=0: # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())
            
        result = ChunkModel.run(self, chunk_specification, dataset, index, float32,
                                 specification=specification, coefficients=coefficients)
        return result

    def run_chunk (self, index, dataset, specification, coefficients):
        self.specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1)
        compute_resources = Resources({"debug":self.debug})
        submodels = self.specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(submodels, self.submodel_string, dataset, index,
                                      dataset_pool=self.dataset_pool, resources = compute_resources)
        variables = self.specified_coefficients.get_full_variable_names_without_constants()
        self.debug.print_debug("Compute variables ...",4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables, dataset_pool = self.dataset_pool, resources = compute_resources)
        data = {}
        coef = {}
        outcome=self.initial_values[index].copy()
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(self.specified_coefficients,submodel)
            self.coefficient_names[submodel] = coef[submodel].get_coefficient_names_without_constant()[0,:]
            self.debug.print_debug("Compute regression for submodel " +str(submodel),4)
            self.increment_current_status_piece()
            self.data[submodel] = dataset.create_regression_data(coef[submodel],
                                                                index = index[self.observations_mapping[submodel]])
            nan_index = where(isnan(self.data[submodel]))[1]
            inf_index = where(isinf(self.data[submodel]))[1]
            vnames = asarray(coef[submodel].get_variable_names())
            if nan_index.size > 0:
                nan_var_index = unique(nan_index)
                self.data[submodel] = nan_to_num(self.data[submodel])
                logger.log_warning("NaN(Not A Number) is returned from variable %s; it is replaced with %s." % (vnames[nan_var_index], nan_to_num(nan)))
                #raise ValueError, "NaN(Not A Number) is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[nan_var_index]
            if inf_index.size > 0:
                inf_var_index = unique(inf_index)
                self.data[submodel] = nan_to_num(self.data[submodel])
                logger.log_warning("Inf is returned from variable %s; it is replaced with %s." % (vnames[inf_var_index], nan_to_num(inf)))
                #raise ValueError, "Inf is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[inf_var_index]
            
            if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0): # observations for this submodel available
                outcome[self.observations_mapping[submodel]] = \
                    self.regression.run(self.data[submodel], coef[submodel].get_coefficient_values()[0,:],
                        resources=self.run_config).astype(outcome.dtype)
        return outcome

    def correct_infinite_values(self, dataset, outcome_attribute_name, maxvalue=1e+38, clip_all_larger_values=False):
        """Check if the model resulted in infinite values. If yes,
        print warning and clip the values to maxvalue. 
        If clip_all_larger_values is True, all values larger than maxvalue are clip to maxvalue.
        """
        infidx = where(dataset.get_attribute(outcome_attribute_name) == inf)[0]

        if infidx.size > 0:
            logger.log_warning("Infinite values in %s. Clipped to %s." % (outcome_attribute_name, maxvalue))
            dataset.set_values_of_one_attribute(outcome_attribute_name, maxvalue, infidx)
        if clip_all_larger_values:
            idx = where(dataset.get_attribute(outcome_attribute_name) > maxvalue)[0]
            if idx.size > 0:
                logger.log_warning("Values in %s larger than %s. Clipped to %s." % (outcome_attribute_name, maxvalue, maxvalue))
                dataset.set_values_of_one_attribute(outcome_attribute_name, maxvalue, idx)
            
    def estimate(self, specification, dataset, outcome_attribute, index = None, procedure=None, data_objects=None,
                        estimate_config=None,  debuglevel=0):
        """'specification' is of type EquationSpecification,
            'dataset' is of type Dataset,
            'outcome_attribute' - string that determines the dependent variable,
            'index' are indices of individuals in dataset for which
                    the model runs. If it is None, the whole dataset is considered.
            'procedure' - name of the estimation procedure. If it is None,
                there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class
                must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'.
                It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays).
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure.
            'debuglevel' overwrites the class 'debuglevel'.
        """
        #import wingdbstub
        self.debug.flag = debuglevel
        if estimate_config == None:
            estimate_config = Resources()
        if not isinstance(estimate_config,Resources) and isinstance(estimate_config, dict):
            estimate_config = Resources(estimate_config)
        self.estimate_config = estimate_config.merge_with_defaults(self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.procedure=procedure
        if self.procedure == None:
            self.procedure = self.estimate_config.get("estimation", None)
        if self.procedure is not None:
            self.procedure = ModelComponentCreator().get_model_component(self.procedure)
        else:
            logger.log_warning("No estimation procedure given, or problems with loading the corresponding module.")

        compute_resources = Resources({"debug":self.debug})
        if dataset.size()<=0: # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())
        if not isinstance(index,ndarray):
            index=array(index)

        estimation_size_agents = self.estimate_config.get("estimation_size_agents", None) # should be a proportion of the agent_set
        if estimation_size_agents == None:
            estimation_size_agents = 1.0
        else:
            estimation_size_agents = max(min(estimation_size_agents,1.0),0.0) # between 0 and 1

        if estimation_size_agents < 1.0:
            self.debug.print_debug("Sampling agents for estimation ...",3)
            estimation_idx = sample_noreplace(arange(index.size),
                                                         int(index.size*estimation_size_agents))
        else:
            estimation_idx = arange(index.size)

        estimation_idx = index[estimation_idx]
        self.debug.print_debug("Number of observations for estimation: " + str(estimation_idx.size),2)
        if estimation_idx.size <= 0:
            self.debug.print_debug("Nothing to be done.",2)
            return (None, None)

        coefficients = create_coefficient_from_specification(specification)
        self.specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1)
        submodels = self.specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(submodels, self.submodel_string, dataset, estimation_idx,
                                      dataset_pool=self.dataset_pool, resources = compute_resources,
                                      submodel_size_max=self.estimate_config.get('submodel_size_max', None))
        variables = self.specified_coefficients.get_full_variable_names_without_constants()
        self.debug.print_debug("Compute variables ...",4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources = compute_resources)

        coef = {}
        estimated_coef={}
        self.outcome = {}
        dataset.compute_variables([outcome_attribute], dataset_pool=self.dataset_pool, resources=compute_resources)
        regression_resources=Resources(estimate_config)
        regression_resources.merge({"debug":self.debug})
        outcome_variable_name = VariableName(outcome_attribute)
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(self.specified_coefficients,submodel)
            self.increment_current_status_piece()
            logger.log_status("Estimate regression for submodel " +str(submodel),
                               tags=["estimate"], verbosity_level=2)
            #logger.log_status("Number of observations: " +str(self.observations_mapping[submodel].size),
                               #tags=["estimate"], verbosity_level=2)
            self.data[submodel] = dataset.create_regression_data_for_estimation(coef[submodel],
                                                            index = estimation_idx[self.observations_mapping[submodel]])
            self.coefficient_names[submodel] = coef[submodel].get_coefficient_names_without_constant()[0,:]
            if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0) and (self.procedure is not None): # observations for this submodel available
                self.outcome[submodel] = dataset.get_attribute_by_index(outcome_variable_name.get_alias(), estimation_idx[self.observations_mapping[submodel]])   
                regression_resources.merge({"outcome":  self.outcome[submodel]})
                regression_resources.merge({"coefficient_names":self.coefficient_names[submodel].tolist(),
                            "constant_position": coef[submodel].get_constants_positions()})
                regression_resources.merge({"submodel": submodel})
                estimated_coef[submodel] = self.procedure.run(self.data[submodel], self.regression,
                                                        resources=regression_resources)
                if "estimators" in estimated_coef[submodel].keys():
                    coef[submodel].set_coefficient_values(estimated_coef[submodel]["estimators"])
                if "standard_errors" in estimated_coef[submodel].keys():
                    coef[submodel].set_standard_errors(estimated_coef[submodel]["standard_errors"])
                if "other_measures" in estimated_coef[submodel].keys():
                    for measure in estimated_coef[submodel]["other_measures"].keys():
                        coef[submodel].set_measure(measure,
                              estimated_coef[submodel]["other_measures"][measure])
                if "other_info" in estimated_coef[submodel].keys():
                    for info in estimated_coef[submodel]["other_info"]:
                        coef[submodel].set_other_info(info,
                              estimated_coef[submodel]["other_info"][info])
        coefficients.fill_coefficients(coef)
        self.specified_coefficients.coefficients = coefficients
        self.save_predicted_values_and_errors(specification, coefficients, dataset, outcome_variable_name, index=index, data_objects=data_objects)
            
        return (coefficients, estimated_coef)

    def prepare_for_run(self, dataset=None, dataset_filter=None, filter_threshold=0, **kwargs):
        spec, coef = prepare_specification_and_coefficients(**kwargs)
        if (dataset is not None) and (dataset_filter is not None):
            filter_values = dataset.compute_variables([dataset_filter], dataset_pool=self.dataset_pool)
            index = where(filter_values > filter_threshold)[0]
        else:
            index = None
        return (spec, coef, index)

    def prepare_for_estimate(self, dataset=None, dataset_filter=None, filter_threshold=0, **kwargs):
        spec = get_specification_for_estimation(**kwargs)
        if (dataset is not None) and (dataset_filter is not None):
            filter_values = dataset.compute_variables([dataset_filter], dataset_pool=self.dataset_pool)
            index = where(filter_values > filter_threshold)[0]
        else:
            index = None
        return (spec, index)
    
    def get_data_as_dataset(self, submodel=-2):
        """Like get_all_data, but the retuning value is a Dataset containing attributes that
        correspond to the data columns. Their names are coefficient names."""
        all_data = self.get_all_data(submodel)
        if all_data is None:
            return None
        names = self.get_coefficient_names(submodel)
        if names is None:
            return None
        dataset_data = {}
        for i in range(names.size):
            dataset_data[names[i]] = all_data[:, i].reshape(all_data.shape[0])
        dataset_data["id"] = arange(all_data.shape[0])+1
        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name='dataset', table_data=dataset_data)
        ds = Dataset(in_storage=storage, id_name="id", in_table_name='dataset')
        return ds

    def save_predicted_values_and_errors(self, specification, coefficients, dataset, outcome_variable, index=None, data_objects=None):
        if self.estimate_config.get('save_predicted_values_and_errors', False):
            logger.log_status('Computing predicted values and residuals.')
            original_values = dataset.get_attribute_by_index(outcome_variable, index)
            predicted_values = zeros(dataset.size(), dtype='float32')
            predicted_values[index] = self.run_after_estimation(specification, coefficients, dataset, index=index, data_objects=data_objects)
            predicted_attribute_name = 'predicted_%s' % outcome_variable.get_alias()
            dataset.add_primary_attribute(name=predicted_attribute_name, data=predicted_values)
            dataset.flush_attribute(predicted_attribute_name)
            predicted_error_attribute_name = 'residuals_%s' % outcome_variable.get_alias()
            error_values = zeros(dataset.size(), dtype='float32')
            error_values[index] = (original_values - predicted_values[index]).astype(error_values.dtype)
            dataset.add_primary_attribute(name=predicted_error_attribute_name, data = error_values)
            dataset.flush_attribute(predicted_error_attribute_name)
            logger.log_status('Predicted values saved as %s (for the %s dataset)' % (predicted_attribute_name, dataset.get_dataset_name()))
            logger.log_status('Residuals saved as %s (for the %s dataset)' % (predicted_error_attribute_name, dataset.get_dataset_name()))
        
    def export_estimation_data(self, submodel=-2, file_name='./estimation_data_regression.txt', delimiter = '\t'):
        import os
        from numpy import newaxis
        data = concatenate((self.outcome[submodel][...,newaxis], self.get_all_data(submodel=submodel)), axis=1)
        header = ['outcome'] + self.get_coefficient_names(submodel).tolist()
        nrows = data.shape[0]
        file_name_root, file_name_ext = os.path.splitext(file_name)
        out_file = "%s_submodel_%s.txt" % (file_name_root, submodel)
        fh = open(out_file,'w')
        fh.write(delimiter.join(header) + '\n')   #file header
        for row in range(nrows):
            line = [str(x) for x in data[row,]]
            fh.write(delimiter.join(line) + '\n')
        fh.flush()
        fh.close
        print 'Data written into %s' % out_file
        
    def run_after_estimation(self, *args, **kwargs):
        return self.run(*args, **kwargs)
            
    def _get_status_total_pieces(self):
        return ChunkModel._get_status_total_pieces(self) * self.get_status_for_gui().get_total_number_of_pieces()
    
    def _get_status_current_piece(self):
        return ChunkModel._get_status_current_piece(self)*self.get_status_for_gui().get_total_number_of_pieces() + self.get_status_for_gui().get_current_piece()
        
    def _get_status_piece_description(self):
        return "%s %s" % (ChunkModel._get_status_piece_description(self), self.get_status_for_gui().get_current_piece_description())
    
    def get_specified_coefficients(self):
        return self.specified_coefficients
    def run( self, building_set, building_types_table, vacancy_table, year, location_set,
            building_categories=None, dataset_pool=None, resources=None ):
        building_types = building_types_table.get_attribute("name")
        building_id_name = building_set.get_id_name()[0]
        location_id_name = location_set.get_id_name()[0]
        new_buildings = {building_id_name: array([], dtype=building_set.get_data_type(building_id_name)),
                         "building_type_id":array([], dtype=building_set.get_data_type("building_type_id", int8)),
                         "year_built": array([], dtype=building_set.get_data_type("year_built", int32)),
                         "sqft": array([], dtype=building_set.get_data_type("sqft", int32)),
                         "residential_units": array([], dtype=building_set.get_data_type("residential_units", int32)),
                         "improvement_value": array([], dtype= building_set.get_data_type("improvement_value", float32)),
                         "land_value": array([], dtype= building_set.get_data_type("land_value", float32)),
                         location_id_name: array([], dtype=building_set.get_data_type(location_id_name, int32))}
        max_id = building_set.get_id_attribute().max()
        buildings_set_size_orig = building_set.size()

        for itype in range(building_types_table.size()): # iterate over building types
            type = building_types[itype]
            type_code = building_types_table.get_id_attribute()[itype]
            is_residential = building_types_table.get_attribute("is_residential")[itype]
            vacancy_attribute = 'target_total_%s_vacancy' % type
            if vacancy_attribute not in vacancy_table.get_known_attribute_names():
                logger.log_warning("No target vacancy for building type '%s'. Transition model for this building type skipped." % type)
                continue
            vacancy_table.get_attribute(vacancy_attribute)  # ensures that the attribute is loaded
            target_vacancy_rate = eval("vacancy_table.get_data_element_by_id( year ).%s" % vacancy_attribute)

            compute_resources = Resources(resources)
            compute_resources.merge({"debug":self.debug})
            units_attribute = building_types_table.get_attribute('units')[itype]

            # determine current-year vacancy rates
            if is_residential:
                default_vacancy_variable = "urbansim.%s.vacant_%s_units_from_buildings" % (
                                                                   location_set.get_dataset_name(), type)
            else:
                default_vacancy_variable = "urbansim.%s.vacant_%s_sqft_from_buildings" % (
                                                                   location_set.get_dataset_name(), type)
            variable_for_vacancy = compute_resources.get(
                                    "%s_vacant_variable" % type, default_vacancy_variable)
            location_set.compute_variables([variable_for_vacancy, "urbansim.%s.buildings_%s_space" % (
                                                                      location_set.get_dataset_name(),type)],
                                        dataset_pool=dataset_pool, resources = compute_resources)

            vacant_units_sum = location_set.get_attribute(variable_for_vacancy).sum()
            units_sum = float( location_set.get_attribute("buildings_%s_space" % type).sum() )
            vacant_rate = self.safe_divide(vacant_units_sum, units_sum)

            should_develop_units = int(round(max( 0, ( target_vacancy_rate * units_sum - vacant_units_sum ) /
                                         ( 1 - target_vacancy_rate ) )))
            logger.log_status(type + ": vacant units: %d, should be vacant: %f, sum units: %d"
                          % (vacant_units_sum, target_vacancy_rate * units_sum, units_sum))

            if not should_develop_units:
                logger.log_note(("Will not build any " + type + " units, because the current vacancy of %d units\n"
                             + "is more than the %d units desired for the vacancy rate of %f.")
                            % (vacant_units_sum,
                               target_vacancy_rate * units_sum,
                               target_vacancy_rate))
                continue

            improvement_value = building_set.compute_variables("urbansim.%s.%s_improvement_value" % (
                                                                     building_set.get_dataset_name(), type),
                                                                   dataset_pool=dataset_pool,
                                                                   resources=compute_resources)
            average_improvement_value = improvement_value.sum()/ units_sum

            #create buildings
            is_building_type = building_set.compute_variables("urbansim.building.is_building_type_%s" % type,
                                                              dataset_pool=dataset_pool,
                                                              resources=compute_resources)
            units_of_this_type = building_set.compute_variables(units_attribute, dataset_pool=dataset_pool,
                                           resources=compute_resources)
            units_of_this_type = units_of_this_type*is_building_type
            units_without_zeros_idx = where(units_of_this_type > 0)[0]
            history_values_without_zeros = units_of_this_type[units_without_zeros_idx]
            history_improvement_values_without_zeros = where(improvement_value[units_without_zeros_idx]>0,
                                                             improvement_value[units_without_zeros_idx],
                                                             average_improvement_value)
            mean_size = history_values_without_zeros.mean()
            idx = array( [], dtype="int32" )
            # Ensure that there are some development projects to choose from.
            num_of_projects_to_select = max( 10, int( should_develop_units / mean_size ) )
            while True:
                idx = concatenate( ( idx, randint( 0, history_values_without_zeros.size,
                                                   size=num_of_projects_to_select) ) )
                csum = history_values_without_zeros[idx].cumsum()
                idx = idx[where( csum <= should_develop_units )]
                if csum[-1] >= should_develop_units:
                    break
            nbuildings = idx.size
            new_buildings["building_type_id"] = concatenate((new_buildings["building_type_id"], type_code*ones(nbuildings)))
            new_buildings["year_built"] = concatenate((new_buildings["year_built"], year*ones(nbuildings)))
            new_max_id = max_id + nbuildings
            new_buildings[building_id_name]=concatenate((new_buildings[building_id_name], arange(max_id+1, new_max_id+1)))
            max_id = new_max_id
            new_buildings["improvement_value"] = concatenate((new_buildings["improvement_value"],
                                                              history_improvement_values_without_zeros[idx]))

            if is_residential:
                target_size_attribute = "residential_units"
                zero_attribute = "sqft"
            else:
                target_size_attribute = "sqft"
                zero_attribute = "residential_units"
            new_buildings[target_size_attribute] = concatenate((new_buildings[target_size_attribute], history_values_without_zeros[idx]))
            new_buildings[zero_attribute] = concatenate((new_buildings[zero_attribute], zeros(nbuildings)))
            new_buildings[location_id_name] = concatenate((new_buildings[location_id_name], zeros(nbuildings)))
            new_buildings["land_value"] = concatenate((new_buildings["land_value"], zeros(nbuildings)))
            logger.log_status("Creating %s %s of %s %s buildings." % (history_values_without_zeros[idx].sum(),
                                                                   target_size_attribute, nbuildings, type))

        building_set.add_elements(new_buildings, require_all_attributes=False)
        if building_categories: # should be a dictionary of categories for each building type
            building_set.resources['building_categories'] = building_categories
        # add submodel attribute
        category_variables = map(lambda type: "urbansim.%s.size_category_%s" % (building_set.get_dataset_name(), type),
                                           building_types)

        for category_var in category_variables:
            var = VariableName(category_var)
            if var.get_alias() in building_set.get_known_attribute_names():
                building_set.delete_one_attribute(var)
            building_set.compute_variables(var, dataset_pool=dataset_pool, resources = compute_resources)
            building_set.add_primary_attribute(building_set.get_attribute(var), var.get_alias())

        difference = building_set.size() - buildings_set_size_orig
        return difference
Esempio n. 26
0
    def run(self,
            building_set,
            new_building_copy_attrs,
            building_type_table,
            building_type_classification_table,
            vacancy_table,
            history_table,
            year,
            location_set,
            resources=None):
        building_classes = building_type_classification_table.get_attribute(
            "name")
        unit_attributes = building_type_classification_table.get_attribute(
            'units')
        building_id_name = building_set.get_id_name()[0]
        location_id_name = location_set.get_id_name()[0]
        calc_attributes = [building_id_name, location_id_name, "year_built"]
        new_buildings = {}
        for attribute in new_building_copy_attrs:
            new_buildings[attribute] = array(
                [], dtype=building_set.get_data_type(attribute))
        for attribute in calc_attributes:
            new_buildings[attribute] = array(
                [], dtype=building_set.get_data_type(attribute))

        # for convenience, make a map of building_type_id => (building_type)class_id
        # these names are hard-wired elsewhere
        building_type_id_to_class_id = {}
        building_type_ids = building_type_table.get_attribute(
            "building_type_id")
        for idx in range(building_type_table.size()):
            building_type_id_to_class_id[building_type_ids[idx]] = \
                building_type_table.get_attribute("class_id")[idx]
        logger.log_status("building_type_id_to_class_id = " +
                          str(building_type_id_to_class_id))

        # and make an column for the history table of the use classes
        history_type_classes = zeros((history_table.size()), dtype=int8)
        history_types = history_table.get_attribute("building_type_id")
        for idx in range(history_table.size()):
            history_type_classes[idx] = building_type_id_to_class_id[
                history_types[idx]]
        logger.log_status("history_types=" + str(history_types))
        logger.log_status("history_type_classes=" + str(history_type_classes))

        max_id = building_set.get_id_attribute().max()
        new_building_id_start = max_id + 1
        new_building_id_end = max_id + 1
        building_set_size_orig = building_set.size()

        for itype in range(building_type_classification_table.size()
                           ):  # iterate over building types
            building_class = building_classes[itype]
            building_class_id = building_type_classification_table.get_attribute(
                "class_id")[itype]

            vacancy_attribute = 'target_total_%s_vacancy' % building_class.lower(
            )
            if vacancy_attribute not in vacancy_table.get_known_attribute_names(
            ):
                logger.log_warning(
                    "No target vacancy for building class '%s' (e.g. no '%s' in target_vacancies). Transition model for this building class skipped."
                    % (building_class, vacancy_attribute))
                continue
            vacancy_table.get_attribute(
                vacancy_attribute)  # ensures that the attribute is loaded
            target_vacancy_rate = eval(
                "vacancy_table.get_data_element_by_id( year ).%s" %
                vacancy_attribute)
            logger.log_status(
                "Target vacancy rate for building_class %s is %f" %
                (building_class, target_vacancy_rate))

            compute_resources = Resources(resources)
            compute_resources.merge({"debug": self.debug})
            units_attribute = unit_attributes[itype]
            occupied_sqft_attribute = 'occupied_sqft_of_typeclass_%s' % building_class.lower(
            )
            total_sqft_attribute = 'where(sanfrancisco.building.building_typeclass_name==\'%s\',sanfrancisco.building.building_sqft,0)' % building_class.lower(
            )

            # determine current-year vacancy rates
            building_set.compute_variables(
                ("sanfrancisco.building." + occupied_sqft_attribute,
                 total_sqft_attribute),
                resources=compute_resources)

            occupied_sqft_sum = building_set.get_attribute(
                occupied_sqft_attribute).sum()
            total_sqft_sum = float(
                building_set.get_attribute(total_sqft_attribute).sum())
            occupancy_rate = self.safe_divide(occupied_sqft_sum,
                                              total_sqft_sum)
            # cap it at 1.0
            if occupancy_rate > 1.0: occupancy_rate = 1.0
            vacancy_rate = 1.0 - occupancy_rate
            vacant_sqft_sum = vacancy_rate * total_sqft_sum

            should_develop_sqft = (target_vacancy_rate *
                                   total_sqft_sum) - vacant_sqft_sum
            logger.log_status(
                "%s: vacancy rate: %4.3f   occupancy rate: %4.3f" %
                (building_class, vacancy_rate, occupancy_rate))
            logger.log_status(
                "%s: vacant: %d, should be vacant: %f, sum units: %d" %
                (building_class, vacant_sqft_sum,
                 target_vacancy_rate * total_sqft_sum, total_sqft_sum))

            if should_develop_sqft <= 0:
                logger.log_note((
                    "Will not build any %s units, because the current vacancy of %d sqft\n"
                    +
                    "is more than the %d sqft desired for the vacancy rate of %f."
                ) % (building_class, vacant_sqft_sum, target_vacancy_rate *
                     total_sqft_sum, target_vacancy_rate))
                continue

            #create buildings

            # find sample set of qualifying buildings in the events history,
            # e.g. where the building_type is in the correct class, and a positive
            # number of units or sqft (or whatever) were present
            history_sqft = history_table.get_attribute('building_sqft')
            index_sampleset = where((history_sqft > 0) & (
                history_type_classes == building_class_id))[0]

            # Ensure that there are some development projects to choose from.
            logger.log_status("should_develop_sqft=" +
                              str(should_develop_sqft))
            if index_sampleset.shape[0] == 0:
                logger.log_warning(
                    "Cannot create new buildings for building use class %s; no buildings in the event history table from which to sample."
                    % building_class)
                continue

            history_sqft_sampleset = history_sqft[index_sampleset]
            logger.log_status("history_sqft_sampleset = " +
                              str(history_sqft_sampleset))

            mean_size = history_sqft_sampleset.mean()
            idx = array([], dtype="int32")
            #TODO: should the 'int' in the following line be 'ceil'?
            num_of_projects_to_select = max(
                10, int(should_develop_sqft / mean_size))
            while True:
                idx = concatenate((idx,
                                   randint(0,
                                           history_sqft_sampleset.size,
                                           size=num_of_projects_to_select)))
                csum = history_sqft_sampleset[idx].cumsum()
                idx = idx[where(csum <= should_develop_sqft)]
                if csum[-1] >= should_develop_sqft:
                    break

            logger.log_status("idx = " + str(idx))

            nbuildings = idx.size
            if nbuildings == 0: continue

            new_building_id_end = new_building_id_start + nbuildings

            # copy_attributes
            for attribute in new_building_copy_attrs:
                attr_values = history_table.get_attribute(attribute)[
                    index_sampleset[idx]]
                new_buildings[attribute] = concatenate(
                    (new_buildings[attribute], attr_values))

            # calc_attributes
            new_buildings[building_id_name] = concatenate(
                (new_buildings[building_id_name],
                 arange(new_building_id_start, new_building_id_end)))
            new_buildings[location_id_name] = concatenate(
                (new_buildings[location_id_name], zeros(nbuildings)))
            new_buildings["year_built"] = concatenate(
                (new_buildings["year_built"], year * ones(nbuildings)))
            logger.log_status("Creating %s sqft of %s %s buildings." %
                              (history_sqft_sampleset[idx].sum(), nbuildings,
                               building_class))
            new_building_id_start = new_building_id_end + 1
            logger.log_status(new_buildings)
        building_set.add_elements(new_buildings, require_all_attributes=False)

        difference = building_set.size() - building_set_size_orig
        index = arange(difference) + building_set_size_orig
        return index
Esempio n. 27
0
class Estimator(GenericModelExplorer):
    def __init__(self, config=None, save_estimation_results=False):
        if 'cache_directory' not in config or config['cache_directory'] is None:
            raise KeyError(
                "The cache directory must be specified in the "
                "given configuration, giving the filesystem path to the cache "
                "directory containing the data with which to estimate. Please "
                "check that your configuration contains the 'cache_directory' "
                "entry and that it is not None.")

        self.simulation_state = SimulationState(new_instance=True)
        self.simulation_state.set_cache_directory(config['cache_directory'])

        SessionConfiguration(
            new_instance=True,
            package_order=config['dataset_pool_configuration'].package_order,
            in_storage=AttributeCache())
        self.config = Resources(config)
        self.save_estimation_results = save_estimation_results
        self.debuglevel = self.config.get("debuglevel", 4)
        self.model_system = ModelSystem()
        self.agents_index_for_prediction = None

        models = self.config.get('models', [])

        self.model_name = None
        if "model_name" in config.keys():
            self.model_name = config["model_name"]
        else:
            for model in models:
                if isinstance(model, dict):
                    model_name = model.keys()[0]
                    if (model[model_name] == "estimate") or (
                            isinstance(model[model_name], list) and
                        ("estimate" in model[model_name])):
                        self.model_name = model_name
                        break
        estimate_config_changes = self.config.get(
            'config_changes_for_estimation', {}).get('estimate_config', {})
        if len(estimate_config_changes) > 0:
            change = Resources({
                'models_configuration': {
                    self.model_name: {
                        'controller': {
                            'init': {
                                'arguments': {}
                            }
                        }
                    }
                }
            })
            estimate_config_str = self.config['models_configuration'].get(
                self.model_name,
                {}).get('controller',
                        {}).get('init',
                                {}).get('arguments',
                                        {}).get('estimate_config', '{}')
            estimate_config = Resources({})
            try:
                estimate_config = eval(estimate_config_str)
            except:
                pass

            estimate_config.merge(estimate_config_changes)
            self.config.merge(change)
            self.config['models_configuration'][
                self.model_name]['controller']['init']['arguments'][
                    'estimate_config'] = 'Resources(%s)' % estimate_config

    def estimate(self, out_storage=None):
        self.model_system.run(self.config,
                              write_datasets_to_cache_at_end_of_year=False)
        self.extract_coefficients_and_specification()

        if self.save_estimation_results:
            self.save_results(out_storage=out_storage)

    def reestimate(self,
                   specification_module_name=None,
                   specification_dict=None,
                   out_storage=None,
                   type=None,
                   submodels=None):
        """specification_module_name is name of a module that contains a dictionary called
        'specification'. If it is not given, the argument specification_dict must be given which is a dictionary object.
        'type' is the name of model member, such as 'commercial', 'residential'. The specification dictionary
        is expected to have an entry of this name. If 'submodels' is given (list or a number),
        the restimation is done only for those submodels.
        """
        if specification_module_name is not None:
            exec("import " + specification_module_name)
            eval("reload (" + specification_module_name + ")")
            exec("specification_dict =" + specification_module_name +
                 ".specification")

        if type is not None:
            specification_dict = specification_dict[type]
        if submodels is not None:  #remove all submodels but the given ones from specification
            submodels_to_be_deleted = specification_dict.keys()
            if not isinstance(submodels, list):
                submodels = [submodels]
            for sm in submodels:
                if sm not in submodels_to_be_deleted:
                    raise ValueError, "Submodel %s not in the specification." % sm
                submodels_to_be_deleted.remove(sm)
                if "_definition_" in submodels_to_be_deleted:
                    submodels_to_be_deleted.remove("_definition_")
            for sm in submodels_to_be_deleted:
                del specification_dict[sm]
        self.specification = EquationSpecification(
            specification_dict=specification_dict)
        new_namespace = self.model_system.run_year_namespace
        keys_coeff_spec = self.get_keys_for_coefficients_and_specification()
        new_namespace[keys_coeff_spec["specification"]] = self.specification
        self.coefficients, coeff_dict_dummy = self.model_system.do_process(
            new_namespace)
        ## update run_year_namespce since it's not been updated by do_process
        self.model_system.run_year_namespace = new_namespace
        self.model_system.run_year_namespace[
            keys_coeff_spec["coefficients"]] = self.coefficients

        ## this gets coeff and spec from run_year_namespce and is only updated in _run_year method
        #self.extract_coefficients_and_specification()
        if self.save_estimation_results:
            self.save_results(out_storage=out_storage)

    def predict(self, predicted_choice_id_name, agents_index=None):
        """ Run prediction. Currently makes sense only for choice models."""
        # Create temporary configuration where all words 'estimate' are replaced by 'run'
        tmp_config = Resources(self.config)

        if self.agents_index_for_prediction is None:
            self.agents_index_for_prediction = self.get_agent_set_index().copy(
            )

        if agents_index is None:
            agents_index = self.agents_index_for_prediction

        tmp_config['models_configuration'][self.model_name]['controller'][
            'run']['arguments']['coefficients'] = "coeff_est"
        tmp_config['models_configuration'][self.model_name]['controller'][
            'run']['arguments']['agents_index'] = "agents_index"
        tmp_config['models_configuration'][self.model_name]['controller'][
            'run']['arguments']['chunk_specification'] = "{'nchunks':1}"

        ### save specification and coefficients to cache (no matter the save_estimation_results flag)
        ### so that the prepare_for_run method could load specification and coefficients from there
        #output_configuration = self.config['output_configuration']
        #del self.config['output_configuration']
        #self.save_results()

        #self.config['output_configuration'] = output_configuration

        #self.model_system.run_year_namespace["coefficients"] = self.coefficients
        #del tmp_config['models_configuration'][self.model_name]['controller']['prepare_for_run']

        try:
            run_year_namespace = copy.copy(
                self.model_system.run_year_namespace)
        except:
            logger.log_error("The estimate() method must be run first")
            return False

        try:
            agents = self.get_agent_set()
            choice_id_name = self.get_choice_set().get_id_name()[0]
            # save current locations of agents
            current_choices = agents.get_attribute(choice_id_name).copy()
            dummy_data = zeros(current_choices.size,
                               dtype=current_choices.dtype) - 1
            agents.modify_attribute(name=choice_id_name,
                                    data=dummy_data)  #reset all choices

            run_year_namespace["process"] = "run"
            run_year_namespace["coeff_est"] = self.coefficients
            run_year_namespace["agents_index"] = agents_index
            run_year_namespace["processmodel_config"] = tmp_config[
                'models_configuration'][self.model_name]['controller']['run']
            new_choices = self.model_system.do_process(run_year_namespace)

            #self.model_system.run(tmp_config, write_datasets_to_cache_at_end_of_year=False)
            #new_choices = agents.get_attribute(choice_id_name).copy()
            agents.modify_attribute(name=choice_id_name, data=current_choices)
            dummy_data[agents_index] = new_choices
            if predicted_choice_id_name not in agents.get_known_attribute_names(
            ):
                agents.add_primary_attribute(name=predicted_choice_id_name,
                                             data=dummy_data)
            else:
                agents.modify_attribute(name=predicted_choice_id_name,
                                        data=dummy_data)
            logger.log_status("Predictions saved into attribute " +
                              predicted_choice_id_name)
            return True
        except Exception, e:
            logger.log_error("Error encountered in prediction: %s" % e)
            logger.log_stack_trace()

        return False
Esempio n. 28
0
    def run(
            self,
            building_set,
            #             building_use_table,
            building_use_classification_table,
            vacancy_table,
            history_table,
            year,
            location_set,
            resources=None):
        building_classes = building_use_classification_table.get_attribute(
            "name")
        unit_attributes = building_use_classification_table.get_attribute(
            'units')
        building_id_name = building_set.get_id_name()[0]
        location_id_name = location_set.get_id_name()[0]
        new_buildings = {
            building_id_name: array([], dtype='int32'),
            "building_use_id": array([], dtype=int8),
            "year_built": array([], dtype='int32'),
            #                         "building_sqft": array([], dtype='int32'),
            #                         "residential_units": array([], dtype='int32'),
            "unit_price": array([], dtype=float32),
            location_id_name: array([], dtype='int32')
        }
        for attribute in unit_attributes:
            new_buildings[attribute] = array([], dtype='int32')

        max_id = building_set.get_id_attribute().max()
        building_set_size_orig = building_set.size()

        for itype in range(building_use_classification_table.size()
                           ):  # iterate over building types
            building_class = building_classes[itype]
            #            type_code = building_types_table.get_id_attribute()[itype]
            vacancy_attribute = 'target_total_%s_vacancy' % building_class
            if vacancy_attribute not in vacancy_table.get_known_attribute_names(
            ):
                logger.log_warning(
                    "No target vacancy for building class '%s'. Transition model for this building class skipped."
                    % type)
                continue
            vacancy_table.get_attribute(
                vacancy_attribute)  # ensures that the attribute is loaded
            target_vacancy_rate = eval(
                "vacancy_table.get_data_element_by_id( year ).%s" %
                vacancy_attribute)

            compute_resources = Resources(resources)
            compute_resources.merge({"debug": self.debug})
            units_attribute = unit_attributes[itype]
            vacant_units_attribute = 'vacant_' + units_attribute

            # determine current-year vacancy rates
            building_set.compute_variables("urbansim_parcel.building." +
                                           vacant_units_attribute,
                                           resources=compute_resources)

            vacant_units_sum = building_set.get_attribute(
                vacant_units_attribute).sum()
            units_sum = float(
                building_set.get_attribute(units_attribute).sum())
            vacant_rate = self.safe_divide(vacant_units_sum, units_sum)

            should_develop_units = max(
                0, (target_vacancy_rate * units_sum - vacant_units_sum) /
                (1 - target_vacancy_rate))
            logger.log_status(
                building_class +
                ": vacant units: %d, should be vacant: %f, sum units: %d" %
                (vacant_units_sum, target_vacancy_rate * units_sum, units_sum))

            if not should_develop_units:
                logger.log_note((
                    "Will not build any " + building_class +
                    " units, because the current vacancy of %d units\n" +
                    "is more than the %d units desired for the vacancy rate of %f."
                ) % (vacant_units_sum, target_vacancy_rate * units_sum,
                     target_vacancy_rate))
                continue

#            average_buildings_value = None
#            if (type+"_improvement_value") in location_set.get_known_attribute_names():
#                average_buildings_value = self.safe_divide(
#                    location_set.get_attribute(type+"_improvement_value" ).sum(), units_sum)

#create buildings

            history_values = history_table.get_attribute(units_attribute)
            index_non_zeros_values = where(history_values > 0)[0]
            history_values_without_zeros = history_values[
                index_non_zeros_values]
            history_type = history_table.get_attribute("building_use_id")
            history_type_without_zeros = history_type[index_non_zeros_values]
            history_price = history_table.get_attribute("unit_price")
            history_price_without_zeros = history_price[index_non_zeros_values]

            #TODO: what happens if history has only zeroes?
            mean_size = history_values_without_zeros.mean()
            idx = array([])
            # Ensure that there are some development projects to choose from.
            #TODO: should the 'int' in the following line be 'ceil'?
            num_of_projects_to_select = max(
                10, int(should_develop_units / mean_size))
            while True:
                idx = concatenate((idx,
                                   randint(0,
                                           history_values_without_zeros.size,
                                           size=num_of_projects_to_select)))
                csum = history_values_without_zeros[idx].cumsum()
                idx = idx[where(csum <= should_develop_units)]
                if csum[-1] >= should_develop_units:
                    break

            nbuildings = idx.size

            for attribute in unit_attributes:

                #if attribute == units_attribute:
                #new_unit_values = history_values_without_zeros[idx]
                #else:
                #new_unit_values = zeros(nbuildings)
                #to accomodate mixed use buildings, allow non units_attribute to be non-zero
                new_unit_values = history_table.get_attribute(attribute)[
                    index_non_zeros_values[idx]]

                new_buildings[attribute] = concatenate(
                    (new_buildings[attribute], new_unit_values))

            new_max_id = max_id + nbuildings
            new_buildings[building_id_name] = concatenate(
                (new_buildings[building_id_name],
                 arange(max_id + 1, new_max_id + 1)))
            new_buildings["building_use_id"] = concatenate(
                (new_buildings["building_use_id"],
                 history_type_without_zeros[idx]))
            new_buildings["year_built"] = concatenate(
                (new_buildings["year_built"],
                 year * ones(nbuildings, dtype="int32")))
            new_buildings["unit_price"] = concatenate(
                (new_buildings["unit_price"],
                 history_price_without_zeros[idx]))
            new_buildings[location_id_name] = concatenate(
                (new_buildings[location_id_name],
                 zeros(nbuildings, dtype="int32")))
            logger.log_status("Creating %s %s of %s %s buildings." %
                              (history_values_without_zeros[idx].sum(),
                               units_attribute, nbuildings, building_class))

        building_set.add_elements(new_buildings, require_all_attributes=False)

        difference = building_set.size() - building_set_size_orig
        index = arange(difference) + building_set_size_orig
        return index
    def get_resources(self, data_dictionary, dataset):
        """Create resources for computing a variable. """
        resources=Resources()
        
        for key in data_dictionary.keys():
            if key in self.datasets:
                data = data_dictionary[key]
                
                storage = StorageFactory().get_storage('dict_storage')
                
                if self.id_names[key] not in data_dictionary[key].keys() and not isinstance(self.id_names[key], list):
                    data[self.id_names[key]] = arange(1, len(data_dictionary[key][data_dictionary[key].keys()[0]])+1) # add id array
                
                id_name = self.id_names[key]
                storage.write_table(table_name = 'data', table_data = data)
                
                if key == "gridcell":
                    gc = GridcellDataset(in_storage=storage, in_table_name='data')
                    
                    # add relative_x and relative_y
                    gc.get_id_attribute()
                    n = int(ceil(sqrt(gc.size())))
                    if "relative_x" not in data.keys():
                        x = (indices((n,n))+1)[1].ravel()
                        gc.add_attribute(x[0:gc.size()], "relative_x", metadata=1)
                    if "relative_y" not in data.keys():
                        y = (indices((n,n))+1)[0].ravel()
                        gc.add_attribute(y[0:gc.size()], "relative_y", metadata=1)
                    resources.merge({key: gc})
                
                elif key == "household":
                    resources.merge({key: HouseholdDataset(in_storage=storage, in_table_name='data')})
                elif key == "development_project":
                    resources.merge({key: DevelopmentProjectDataset(in_storage=storage, in_table_name='data')})
                elif key == "development_event":
                    resources.merge({key: DevelopmentEventDataset(in_storage=storage, in_table_name='data')})   
                elif key == "neighborhood":
                    resources.merge({key: NeighborhoodDataset(in_storage=storage, in_table_name='data')})
                elif key == "job":
                    resources.merge({key: JobDataset(in_storage=storage, in_table_name='data')})                    
                elif key == "zone":
                    resources.merge({key: ZoneDataset(in_storage=storage, in_table_name='data')})
                elif key == "travel_data":
                    resources.merge({key: TravelDataDataset(in_storage=storage, in_table_name='data')})
                elif key == "faz":
                    resources.merge({key: FazDataset(in_storage=storage, in_table_name='data')})
                elif key == "fazdistrict":
                    resources.merge({key: FazdistrictDataset(in_storage=storage, in_table_name='data')})                    
                elif key == "race":
                    resources.merge({key: RaceDataset(in_storage=storage, in_table_name='data')})
                elif key == "county":
                    resources.merge({key: CountyDataset(in_storage=storage, in_table_name='data')})
                elif key == "large_area":
                    resources.merge({key: LargeAreaDataset(in_storage=storage, in_table_name='data')})
                elif key == "development_group":
                    resources.merge({key: DevelopmentGroupDataset(in_storage=storage, in_table_name='data')})
                elif key == "employment_sector_group":
                    resources.merge({key: EmploymentSectorGroupDataset(in_storage=storage, in_table_name='data')})        
                elif key == "plan_type_group":
                    resources.merge({key: PlanTypeGroupDataset(in_storage=storage, in_table_name='data')})
                elif key == "building":
                    resources.merge({key: BuildingDataset(in_storage=storage, in_table_name='data')})
                    
            else:
                resources.merge({key:data_dictionary[key]})

        if dataset in self.interactions:
            if dataset == "household_x_gridcell": 
                resources.merge({"dataset": HouseholdXGridcellDataset(dataset1=resources["household"], dataset2=resources["gridcell"])})
            if dataset == "job_x_gridcell":
                resources.merge({"dataset": JobXGridcellDataset(dataset1=resources["job"], dataset2=resources["gridcell"])})
            if dataset == "household_x_zone":
                resources.merge({"dataset": HouseholdXZoneDataset(dataset1=resources["household"], dataset2=resources["zone"])})
            if dataset == "household_x_neighborhood":
                resources.merge({"dataset": HouseholdXNeighborhoodDataset(dataset1=resources["household"], dataset2=resources["neighborhood"])})
            if dataset == "development_project_x_gridcell":
                resources.merge({"dataset": DevelopmentProjectXGridcellDataset(dataset1=resources["development_project"], dataset2=resources["gridcell"])})

        else:
            resources.merge({"dataset": resources[dataset]})
        resources.merge({"check_variables":'*', "debug":4})
        return resources
Esempio n. 30
0
    def get_resources(self, data_dictionary, dataset):
        """Create resources for computing a variable. """
        resources = Resources()
        for key in data_dictionary.keys():
            if key in self.datasets:
                data = data_dictionary[key]
                if self.id_names[key] not in data_dictionary[key].keys(
                ) and not isinstance(self.id_names[key], list):

                    data[self.id_names[key]] = arange(1,\
                        len(data_dictionary[key][data_dictionary[key].keys()[0]])+1) # add id array

                if key == "land_cover":
                    land_cover_storage = StorageFactory().get_storage(
                        'dict_storage')
                    land_cover_table_name = 'land_cover'
                    land_cover_storage.write_table(
                        table_name=land_cover_table_name,
                        table_data=data,
                    )

                    lc = LandCoverDataset(
                        in_storage=land_cover_storage,
                        in_table_name=land_cover_table_name,
                    )

                    # add relative_x and relative_y
                    lc.get_id_attribute()
                    n = int(ceil(sqrt(lc.size())))

                    if "relative_x" not in data.keys():
                        x = (indices((n, n)) + 1)[1].ravel()
                        lc.add_attribute(x[0:lc.size()],
                                         "relative_x",
                                         metadata=1)
                    if "relative_y" not in data.keys():
                        y = (indices((n, n)) + 1)[0].ravel()
                        lc.add_attribute(y[0:lc.size()],
                                         "relative_y",
                                         metadata=1)

                    resources.merge({key: lc})

                if key == "gridcell":
                    gridcell_storage = StorageFactory().get_storage(
                        'dict_storage')
                    gridcell_table_name = 'gridcell'
                    gridcell_storage.write_table(
                        table_name=gridcell_table_name,
                        table_data=data,
                    )

                    gridcell_dataset = GridcellDataset(
                        in_storage=gridcell_storage,
                        in_table_name=gridcell_table_name,
                    )

                    resources.merge({key: gridcell_dataset})
            else:
                resources.merge({key: data_dictionary[key]})

        if dataset in self.interactions:
            pass
        else:
            resources.merge({"dataset": resources[dataset]})
        resources.merge({"check_variables": '*', "debug": 4})
        return resources
Esempio n. 31
0
    def estimate(self,
                 specification,
                 dataset,
                 outcome_attribute,
                 index=None,
                 procedure=None,
                 data_objects=None,
                 estimate_config=None,
                 debuglevel=0):
        """'specification' is of type EquationSpecification,
            'dataset' is of type Dataset,
            'outcome_attribute' - string that determines the dependent variable,
            'index' are indices of individuals in dataset for which
                    the model runs. If it is None, the whole dataset is considered.
            'procedure' - name of the estimation procedure. If it is None,
                there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class
                must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'.
                It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays).
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure.
            'debuglevel' overwrites the class 'debuglevel'.
        """
        #import wingdbstub
        self.debug.flag = debuglevel
        if estimate_config == None:
            estimate_config = Resources()
        if not isinstance(estimate_config, Resources) and isinstance(
                estimate_config, dict):
            estimate_config = Resources(estimate_config)
        self.estimate_config = estimate_config.merge_with_defaults(
            self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.procedure = procedure
        if self.procedure == None:
            self.procedure = self.estimate_config.get("estimation", None)
        if self.procedure is not None:
            self.procedure = ModelComponentCreator().get_model_component(
                self.procedure)
        else:
            logger.log_warning(
                "No estimation procedure given, or problems with loading the corresponding module."
            )

        compute_resources = Resources({"debug": self.debug})
        if dataset.size() <= 0:  # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())
        if not isinstance(index, ndarray):
            index = array(index)

        estimation_size_agents = self.estimate_config.get(
            "estimation_size_agents",
            None)  # should be a proportion of the agent_set
        if estimation_size_agents == None:
            estimation_size_agents = 1.0
        else:
            estimation_size_agents = max(min(estimation_size_agents, 1.0),
                                         0.0)  # between 0 and 1

        if estimation_size_agents < 1.0:
            self.debug.print_debug("Sampling agents for estimation ...", 3)
            estimation_idx = sample_noreplace(
                arange(index.size), int(index.size * estimation_size_agents))
        else:
            estimation_idx = arange(index.size)

        estimation_idx = index[estimation_idx]
        self.debug.print_debug(
            "Number of observations for estimation: " +
            str(estimation_idx.size), 2)
        if estimation_idx.size <= 0:
            self.debug.print_debug("Nothing to be done.", 2)
            return (None, None)

        coefficients = create_coefficient_from_specification(specification)
        specified_coefficients = SpecifiedCoefficients().create(coefficients,
                                                                specification,
                                                                neqs=1)
        submodels = specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(
            submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(
            submodels,
            self.submodel_string,
            dataset,
            estimation_idx,
            dataset_pool=self.dataset_pool,
            resources=compute_resources,
            submodel_size_max=self.estimate_config.get('submodel_size_max',
                                                       None))
        variables = specified_coefficients.get_full_variable_names_without_constants(
        )
        self.debug.print_debug("Compute variables ...", 4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables,
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)

        coef = {}
        estimated_coef = {}
        self.outcome = {}
        dataset.compute_variables([outcome_attribute],
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)
        regression_resources = Resources(estimate_config)
        regression_resources.merge({"debug": self.debug})
        outcome_variable_name = VariableName(outcome_attribute)
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(
                specified_coefficients, submodel)
            self.increment_current_status_piece()
            logger.log_status("Estimate regression for submodel " +
                              str(submodel),
                              tags=["estimate"],
                              verbosity_level=2)
            logger.log_status("Number of observations: " +
                              str(self.observations_mapping[submodel].size),
                              tags=["estimate"],
                              verbosity_level=2)
            self.data[
                submodel] = dataset.create_regression_data_for_estimation(
                    coef[submodel],
                    index=estimation_idx[self.observations_mapping[submodel]])
            self.coefficient_names[submodel] = coef[
                submodel].get_coefficient_names_without_constant()[0, :]
            if (self.data[submodel].shape[0] > 0
                ) and (self.data[submodel].size > 0) and (
                    self.procedure
                    is not None):  # observations for this submodel available
                self.outcome[submodel] = dataset.get_attribute_by_index(
                    outcome_variable_name.get_alias(),
                    estimation_idx[self.observations_mapping[submodel]])
                regression_resources.merge({"outcome": self.outcome[submodel]})
                regression_resources.merge({
                    "coefficient_names":
                    self.coefficient_names[submodel].tolist(),
                    "constant_position":
                    coef[submodel].get_constants_positions()
                })
                estimated_coef[submodel] = self.procedure.run(
                    self.data[submodel],
                    self.regression,
                    resources=regression_resources)
                if "estimators" in estimated_coef[submodel].keys():
                    coef[submodel].set_coefficient_values(
                        estimated_coef[submodel]["estimators"])
                if "standard_errors" in estimated_coef[submodel].keys():
                    coef[submodel].set_standard_errors(
                        estimated_coef[submodel]["standard_errors"])
                if "other_measures" in estimated_coef[submodel].keys():
                    for measure in estimated_coef[submodel][
                            "other_measures"].keys():
                        coef[submodel].set_measure(
                            measure, estimated_coef[submodel]["other_measures"]
                            [measure])
                if "other_info" in estimated_coef[submodel].keys():
                    for info in estimated_coef[submodel]["other_info"]:
                        coef[submodel].set_other_info(
                            info, estimated_coef[submodel]["other_info"][info])
        coefficients.fill_coefficients(coef)

        self.save_predicted_values_and_errors(specification,
                                              coefficients,
                                              dataset,
                                              outcome_variable_name,
                                              index=index,
                                              data_objects=data_objects)

        return (coefficients, estimated_coef)
Esempio n. 32
0
    def load_dataset(self, resources=None, attributes=None, in_storage=None,
                     in_table_name=None, lowercase=None, **kwargs):

        #set defaults
        attributes_default = '*'
        lower_default = 1 # if 1, use lowercase for attribute names

        # merge arguments with dictionaries and add missing entries
        local_resources = Resources(self.resources)
        if resources is not None:
            local_resources.merge_if_not_None(resources)
        local_resources.merge_if_not_None({"attributes":attributes,
                                           "in_storage":in_storage,
                                           "in_table_name":in_table_name,
                                           "lowercase":lowercase})
        local_resources.merge_with_defaults({"attributes":attributes_default,
                                             "lowercase":lower_default,
                                            })

        # check obligatory entries
        local_resources.check_obligatory_keys(["in_storage", "in_table_name"])

        # prepare for loading
        in_storage = local_resources["in_storage"]

        if not self._is_hidden_id():
            local_resources.merge({"id_name":self._id_names})
            
        table_name = local_resources['in_table_name']
        column_names = local_resources['attributes']
        chunked_attributes = self.chunk_columns(storage=in_storage,
                                                   table_name=table_name, 
                                                   column_names=column_names,
                                                   nchunks=1)
        # flatten list
        column_names = [name for name in chunked_attributes[0]
                                if name in in_storage.get_column_names(table_name)]
        data = in_storage.load_table(table_name = table_name, 
                                             column_names = column_names)
        self.df = pd.DataFrame(data)
        self.df.set_index(self._id_names, inplace=True)
        data_computed = {}
        if table_name+".computed" in in_storage.get_table_names():
            column_names_computed = [name for name in column_names
                                if name in in_storage.get_column_names(table_name+".computed")]
            data_computed = in_storage.load_table(table_name = table_name+".computed", 
                                                 column_names = column_names_computed)
            dfcomp = pd.DataFrame(data_computed)
            dfcomp.set_index(self._id_names, inplace=True)
            self.df = concat(self.df, dfcomp)
                      
        for attr in data:
            if not ((attr in self._id_names) and self.attribute_boxes.has_key(attr)): #do not store id_name every time
                self.attribute_boxes[attr] = AttributeBox(self, [],
                                                variable_name=self.create_and_check_qualified_variable_name(attr),
                                                type=AttributeType.PRIMARY,
                                                is_in_memory=True,
                                                header=None,
                                                version=0)

        for attr in data_computed:
            if not ((attr in self._id_names) and self.attribute_boxes.has_key(attr)): #do not store id_name every time
                self.attribute_boxes[attr] = AttributeBox(self, [],
                                                variable_name=self.create_and_check_qualified_variable_name(attr),
                                                type=AttributeType.COMPUTED,
                                                is_in_memory=True,
                                                header=None,
                                                version=0)
                                                                        
        self.n = self.df.shape[0]