def __init__(self,
            resources=None,
            in_storage=None,
            out_storage=None,
            in_table_name=None,
            out_table_name=None,
            attributes=None,
            id_name=None,
            nchunks=None,
            other_in_table_names=None,
            debuglevel=0
            ):
        debug = DebugPrinter(debuglevel)
        debug.print_debug("Creating EmploymentSectorGroupDataset object.",2)
        resources = ResourceCreatorEmploymentSectorGroups().get_resources_for_dataset(
            resources = resources,
            in_storage = in_storage,
            out_storage = out_storage,
            in_table_name = in_table_name,
            out_table_name = out_table_name,
            attributes = attributes,
            id_name = id_name,
            id_name_default = self.id_name_default,
            nchunks = nchunks,
            debug = debug,
            )

        Dataset.__init__(self,resources = resources)

        if isinstance(other_in_table_names,list):
            for place_name in other_in_table_names: #load other tables
                ds = Dataset(resources = resources)
                ds.load_dataset(in_table_name=place_name)
                self.connect_datasets(ds)
Example #2
0
    def __init__(self,
            resources=None,
            in_storage=None,
            out_storage=None,
            in_table_name=None,
            out_table_name=None,
            attributes=None,
            id_name=None,
            nchunks=None,
            other_in_table_names=[],
            debuglevel=0
            ):
        debug = DebugPrinter(debuglevel)
        debug.print_debug("Creating PlanTypeDataset object.",2)
        resources = self._get_resources_for_dataset(
            in_table_name_default="plan_types",
            in_table_name_groups_default="plan_type_group_definitions",
            out_table_name_default="plan_types",
            dataset_name="plan_type",
            resources = resources,
            in_storage = in_storage,
            out_storage = out_storage,
            in_table_name = in_table_name,
            out_table_name = out_table_name,
            attributes = attributes,
            id_name = id_name,
            id_name_default = self.id_name_default,
            debug = debug,
            )

        AbstractGroupDataset.__init__(self,
            resources=resources,
            other_in_table_names=other_in_table_names, 
            use_groups=True
            )
Example #3
0
    def __init__(self,
                 resources=None,
                 in_storage=None,
                 out_storage=None,
                 in_table_name=None,
                 out_table_name=None,
                 attributes=None,
                 id_name=None,
                 nchunks=None,
                 other_in_table_names=[],
                 debuglevel=0):
        debug = DebugPrinter(debuglevel)
        debug.print_debug("Creating EmploymentSectorDataset object.", 2)
        resources = self._get_resources_for_dataset(
            in_table_name_default="employment_sectors",
            in_table_name_groups_default=
            "employment_adhoc_sector_group_definitions",
            out_table_name_default="employment_sectors",
            dataset_name="employment_sector",
            resources=resources,
            in_storage=in_storage,
            out_storage=out_storage,
            in_table_name=in_table_name,
            out_table_name=out_table_name,
            attributes=attributes,
            id_name=id_name,
            id_name_default=self.id_name_default,
            debug=debug,
        )

        AbstractGroupDataset.__init__(
            self,
            resources=resources,
            other_in_table_names=other_in_table_names,
            use_groups=True)
    def __init__(self, resources=None, in_storage=None, out_storage=None,
                  in_table_name=None, attributes=None,
                  out_table_name=None, id_name=None,
                  nchunks=None, other_in_table_names=None,
                  debuglevel=0):
        debug = DebugPrinter(debuglevel)
        debug.print_debug("Creating DevelopmentGroupDataset object.",2)
        resources = ResourceCreatorDevelopmentGroups().get_resources_for_dataset(
            resources = resources,
            in_storage = in_storage,
            out_storage = out_storage,
            in_table_name = in_table_name,
            out_table_name = out_table_name,
            attributes = attributes,
            id_name = id_name,
            id_name_default = self.id_name_default,
            nchunks = nchunks,
            debug = debug
            )

        Dataset.__init__(self,resources = resources)

        if isinstance(other_in_table_names,list):
            for place_name in other_in_table_names: #load other tables
                ds = Dataset(resources = resources)
                ds.load_dataset(in_table_name=place_name)
                self.connect_datasets(ds)
 def __init__(self,
              location_id_name="grid_id",
              dataset_pool=None,
              debuglevel=0):
     self.debug = DebugPrinter(debuglevel)
     self.location_id_name = location_id_name
     self.dataset_pool = self.create_dataset_pool(dataset_pool,
                                                  ["urbansim", "opus_core"])
Example #6
0
 def __init__(self):
     self.dependencies_list = None
     self.dataset = None
     self.number_of_compute_runs = 0
     try:
         self.debug = SessionConfiguration().get('debuglevel', 0)
     except:
         self.debug = 0
     if isinstance(self.debug, int):
         self.debug = DebugPrinter(self.debug)
Example #7
0
 def __init__(self):
     self.dependencies_list = None
     self.dataset = None
     self.number_of_compute_runs = 0
     try:
         self.debug = SessionConfiguration().get('debuglevel', 0)
     except:
         self.debug = 0
     if isinstance(self.debug, int):
         self.debug = DebugPrinter(self.debug)
Example #8
0
    def __init__(self,
                 categories=array([
                     1,
                 ]),
                 resources=None,
                 what=None,
                 attribute_name=None,
                 data=None,
                 names=None,
                 in_storage=None,
                 out_storage=None,
                 in_table_name=None,
                 attributes=None,
                 out_table_name=None,
                 id_name=None,
                 nchunks=None,
                 debuglevel=0):
        """
        'what' must be a string, such as 'residential' or 'commercial'.
        """
        debug = DebugPrinter(debuglevel)
        debug.print_debug(
            "Creating DevelopmentProjectDataset object for %s projects." %
            what, 2)

        self.categories = categories
        self.what = what
        self.attribute_name = attribute_name
        attributes_default = AttributeType.PRIMARY
        dataset_name = "development_project"
        nchunks_default = 1

        if data <> None:
            in_storage = StorageFactory().get_storage('dict_storage')

            in_storage.write_table(table_name='development_projects',
                                   table_data=data)
            in_table_name = 'development_projects'

        resources = ResourceFactory().get_resources_for_dataset(
            dataset_name,
            resources=resources,
            in_storage=in_storage,
            out_storage=out_storage,
            in_table_name_pair=(in_table_name, None),
            out_table_name_pair=(out_table_name, None),
            attributes_pair=(attributes, attributes_default),
            id_name_pair=(id_name, self.id_name_default),
            nchunks_pair=(nchunks, nchunks_default),
            debug_pair=(debug, None))

        self.category_variable_name = resources.get(
            "category_variable_name", self.category_variable_name_default)
        Dataset.__init__(self, resources=resources)
 def __init__(self, resources=None, dataset1=None, dataset2=None, index1 = None, index2 = None, 
             debuglevel=0):
     debug = DebugPrinter(debuglevel)
     debug.print_debug("Creating object %s.%s" % (self.__class__.__module__, self.__class__.__name__), 2)
     
     local_resources = Resources(resources)
     local_resources.merge_if_not_None({"dataset1":dataset1, 
         "dataset2":dataset2, "debug":debug, 
         "index1":index1, "index2":index2})
     CoreInteractionDataset.__init__(self, resources = local_resources)
     
     
    def __init__(
        self,
        categories=array([1]),
        resources=None,
        what=None,
        attribute_name=None,
        data=None,
        names=None,
        in_storage=None,
        out_storage=None,
        in_table_name=None,
        attributes=None,
        out_table_name=None,
        id_name=None,
        nchunks=None,
        debuglevel=0,
    ):
        """
        'what' must be a string, such as 'residential' or 'commercial'.
        """
        debug = DebugPrinter(debuglevel)
        debug.print_debug("Creating DevelopmentProjectDataset object for %s projects." % what, 2)

        self.categories = categories
        self.what = what
        self.attribute_name = attribute_name
        attributes_default = AttributeType.PRIMARY
        dataset_name = "development_project"
        nchunks_default = 1

        if data <> None:
            in_storage = StorageFactory().get_storage("dict_storage")

            in_storage.write_table(table_name="development_projects", table_data=data)
            in_table_name = "development_projects"

        resources = ResourceFactory().get_resources_for_dataset(
            dataset_name,
            resources=resources,
            in_storage=in_storage,
            out_storage=out_storage,
            in_table_name_pair=(in_table_name, None),
            out_table_name_pair=(out_table_name, None),
            attributes_pair=(attributes, attributes_default),
            id_name_pair=(id_name, self.id_name_default),
            nchunks_pair=(nchunks, nchunks_default),
            debug_pair=(debug, None),
        )

        self.category_variable_name = resources.get("category_variable_name", self.category_variable_name_default)
        Dataset.__init__(self, resources=resources)
 def __init__(self,
              location_id_name=None,
              variable_package=None,
              dataset_pool=None,
              debuglevel=0):
     self.debug = DebugPrinter(debuglevel)
     self.location_id_name = self.location_id_name_default
     self.variable_package = self.variable_package_default
     if location_id_name is not None:
         self.location_id_name = location_id_name
     if variable_package is not None:
         self.variable_package = variable_package
     self.dataset_pool = self.create_dataset_pool(dataset_pool,
                                                  ["urbansim", "opus_core"])
    def run(self, projects, types, units, year=0, location_id_name="grid_id", debuglevel=0):
        debug = DebugPrinter(debuglevel)
        grid_ids_for_any_project = array([], dtype=int32)
        grid_ids_by_project_type = {}
        for project_type in types:
            grid_ids_by_project_type[project_type] = array([], dtype=int32)
            if projects[project_type] <> None:
                grid_ids_by_project_type[project_type] = projects[project_type].get_attribute(location_id_name)
            grid_ids_for_any_project = unique(concatenate((grid_ids_for_any_project, 
                                                                  grid_ids_by_project_type[project_type])))
        grid_ids_for_any_project = grid_ids_for_any_project[where(grid_ids_for_any_project>0)]
        if not len(grid_ids_for_any_project): return
        
        result_data = {location_id_name: grid_ids_for_any_project, 
                       "scheduled_year":(year*ones((grid_ids_for_any_project.size,))).astype(int32)}
        for unit in units:
            result_data[unit] = zeros((grid_ids_for_any_project.size,), dtype=int32)
        for project_type in types:
            result_data["%s_improvement_value" % project_type] = zeros((grid_ids_for_any_project.size,), dtype=int32)
            
        grid_idx=0
        for grid_id in grid_ids_for_any_project:
            for i in range(0,len(types)):
                project_type = types[i]
                my_projects = projects[project_type]
                w = where(my_projects.get_attribute(location_id_name) == grid_id)[0]
                if w.size>0:
                    unit_variable = units[i]
                    result_data[unit_variable][grid_idx] = \
                        my_projects.get_attribute_by_index( 
                            my_projects.get_attribute_name(), w).sum()
                    result_data["%s_improvement_value" % project_type][grid_idx] = \
                        my_projects.get_attribute_by_index( 
                            "improvement_value", w).sum()
            grid_idx += 1  
        
        storage = StorageFactory().get_storage('dict_storage')

        eventset_table_name = 'development_events_generated'        
        storage.write_table(table_name=eventset_table_name, table_data=result_data)

        eventset = DevelopmentEventDataset(
            in_storage = storage, 
            in_table_name = eventset_table_name, 
            id_name = [location_id_name, "scheduled_year"],
            ) 
                                      
        debug.print_debug("Number of events: " + str(grid_ids_for_any_project.size), 3)
        return eventset
    def __init__(self, regression_procedure="opus_core.linear_regression",
                  submodel_string=None,
                  run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None):
 
        self.debug = DebugPrinter(debuglevel)

        self.dataset_pool = self.create_dataset_pool(dataset_pool)

        self.regression = RegressionModelFactory().get_model(name=regression_procedure)
        if self.regression == None:
            raise StandardError, "No regression procedure given."

        self.submodel_string = submodel_string

        self.run_config = run_config
        if self.run_config == None:
            self.run_config = Resources()
        if not isinstance(self.run_config,Resources) and isinstance(self.run_config, dict):
            self.run_config = Resources(self.run_config)

        self.estimate_config = estimate_config
        if self.estimate_config == None:
            self.estimate_config = Resources()
        if not isinstance(self.estimate_config,Resources) and isinstance(self.estimate_config, dict):
            self.estimate_config = Resources(self.estimate_config)
            
        self.data = {}
        self.coefficient_names = {}
        ChunkModel.__init__(self)
        self.get_status_for_gui().initialize_pieces(3, pieces_description = array(['initialization', 'computing variables', 'submodel: 1']))
Example #14
0
    def get_class(self,
                  module_composed_name,
                  class_name=None,
                  arguments={},
                  debug=0):
        """
        'module_composed_name' is either a string or an instance of 
        class ComposedName that represent the full name of the module in which the class 
        given by 'class_name' is implemented. If  'class_name' is None, it is considered to have
        te same name as the module.
        'arguments' is a dictionary with names and values of arguments of the class constructor.
        It returns an object of the given class.
        """
        if module_composed_name == None: return None
        if isinstance(module_composed_name, str):
            module_c_name = ComposedName(module_composed_name)
        else:
            module_c_name = module_composed_name

        if class_name == None:
            class_name = module_c_name.get_short_name()

        if not isinstance(debug, DebugPrinter):
            debug = DebugPrinter(debug)

        ev = "from " + module_c_name.get_full_name() + " import " + class_name
        try:
            exec(ev)
        except ImportError:
            raise ImportError("Module named '%s' does not exist or could not "
                              "import class '%s' from it." %
                              (module_c_name.get_full_name(), class_name))
        return eval(class_name + "(**arguments)")
 def __init__(self, location_id_name=None, variable_package=None, dataset_pool=None, debuglevel=0):
     self.debug = DebugPrinter(debuglevel)
     self.location_id_name = self.location_id_name_default
     self.variable_package = self.variable_package_default
     if location_id_name is not None:
         self.location_id_name = location_id_name
     if variable_package is not None:
         self.variable_package = variable_package
     self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"])
Example #16
0
 def __init__(self,
              probabilities="urbansim.rate_based_probabilities",
              choices="opus_core.random_choices",
              location_id_name="grid_id",
              model_name="Agent Relocation Model",
              debuglevel=0,
              resources=None):
     self.model_name = model_name
     self.location_id_name = location_id_name
     self.debug = DebugPrinter(debuglevel)
     self.upc_sequence = None
     if probabilities is not None:
         self.upc_sequence = UPCFactory().get_model(
             utilities=None,
             probabilities=probabilities,
             choices=choices,
             debuglevel=debuglevel)
     self.resources = merge_resources_if_not_None(resources)
Example #17
0
 def __init__(self,
              utility_class=None,
              probability_class=None,
              choice_class=None,
              resources=None,
              debuglevel=0):
     """utility_class, probability_class, choice_class are objects of the corresponding classes.
         They must have a method 'run'.
     """
     self.utility_class = utility_class
     self.probability_class = probability_class
     self.choice_class = choice_class
     self.resources = resources
     if self.resources == None:
         self.resources = Resources()
     self.utilities = None
     self.probabilities = None
     self.choices = None
     self.debug = DebugPrinter(debuglevel)
    def __init__(self, resources=None, what="household", in_storage=None,
                 in_table_name=None, out_storage=None, out_table_name=None, 
                 id_name=None, nchunks=None, debuglevel=0):
        ## TODO remove "what" arguement
        
        debug = DebugPrinter(debuglevel)
        debug.print_debug("Creating ControlTotalDataset object for "+what+".",2)
        
        if not self.in_table_name_default:
            self.in_table_name_default = "annual_" + what + "_control_totals"
        if not self.out_table_name_default:         
            self.out_table_name_default = "annual_" + what + "_control_totals"
            
        attributes_default = AttributeType.PRIMARY
        #dataset_name = "control_total"
        nchunks_default = 1

        resources = ResourceFactory().get_resources_for_dataset(
            self.dataset_name,
            resources=resources,
            in_storage=in_storage,
            out_storage=out_storage,
            in_table_name_pair=(in_table_name,self.in_table_name_default),
            attributes_pair=(None, attributes_default),
            out_table_name_pair=(out_table_name, self.out_table_name_default),
            id_name_pair=(id_name,self.id_name_default),
            nchunks_pair=(nchunks,nchunks_default),
            debug_pair=(debug,None)
            )
        
        table_name = resources["in_table_name"]
        if resources['id_name'] is None or len(resources['id_name'])== 0:
            #if both self.id_name_default and id_name argument in __init__ is unspecified, 
            #ControlTotalDataset would use all attributes not beginning with "total"
            #as id_name
            id_names = []
            column_names = resources["in_storage"].get_column_names(table_name)
            for column_name in column_names:
                if not re.search('^total', column_name):
                    id_names.append(column_name)
            resources.merge({"id_name":resources["id_name"] + id_names})

        Dataset.__init__(self, resources = resources)
    def __init__(self,
                 regression_procedure="opus_core.linear_regression",
                 submodel_string=None,
                 run_config=None,
                 estimate_config=None,
                 debuglevel=0,
                 dataset_pool=None):

        self.debug = DebugPrinter(debuglevel)

        self.dataset_pool = self.create_dataset_pool(dataset_pool)

        self.regression = RegressionModelFactory().get_model(
            name=regression_procedure)
        if self.regression == None:
            raise StandardError, "No regression procedure given."

        self.submodel_string = submodel_string

        self.run_config = run_config
        if self.run_config == None:
            self.run_config = Resources()
        if not isinstance(self.run_config, Resources) and isinstance(
                self.run_config, dict):
            self.run_config = Resources(self.run_config)

        self.estimate_config = estimate_config
        if self.estimate_config == None:
            self.estimate_config = Resources()
        if not isinstance(self.estimate_config, Resources) and isinstance(
                self.estimate_config, dict):
            self.estimate_config = Resources(self.estimate_config)

        self.data = {}
        self.coefficient_names = {}
        ChunkModel.__init__(self)
        self.get_status_for_gui().initialize_pieces(3,
                                                    pieces_description=array([
                                                        'initialization',
                                                        'computing variables',
                                                        'submodel: 1'
                                                    ]))
Example #20
0
    def __init__(self,
                 resources=None,
                 dataset1=None,
                 dataset2=None,
                 index1=None,
                 index2=None,
                 debuglevel=0):
        debug = DebugPrinter(debuglevel)
        debug.print_debug(
            "Creating object %s.%s" %
            (self.__class__.__module__, self.__class__.__name__), 2)

        local_resources = Resources(resources)
        local_resources.merge_if_not_None({
            "dataset1": dataset1,
            "dataset2": dataset2,
            "debug": debug,
            "index1": index1,
            "index2": index2
        })
        CoreInteractionDataset.__init__(self, resources=local_resources)
Example #21
0
 def __init__(self, 
         resources=None, 
         data=None, 
         names=None, 
         in_storage=None, 
         out_storage=None,
         in_table_name=None, 
         out_table_name=None,
         attributes=None, 
         id_name=None,
         nchunks=None, 
         debuglevel=0
         ):
     debug = DebugPrinter(debuglevel)
     debug.print_debug("Creating LandUseDevelopmentDataset object.", 2 )
     
     dataset_name = "landuse_development"
     nchunks_default = 1
     
     if data <> None:
         in_storage = StorageFactory().get_storage('dict_storage')
         
         in_storage.write_table(table_name='landuse_developments', table_data=data)
         in_table_name='landuse_developments'
     
     resources = ResourceFactory().get_resources_for_dataset(
             dataset_name, 
             in_storage=in_storage, 
             out_storage=out_storage,
             resources=resources, 
             in_table_name_pair=(in_table_name,None), 
             out_table_name_pair=(out_table_name, None), 
             id_name_pair=(id_name,self.id_name_default), 
             nchunks_pair=(nchunks,nchunks_default), 
             debug_pair=(debug,None)
             )
         
     Dataset.__init__(self, resources = resources)
    def __init__(self,
            resources=None,
            in_storage=None,
            out_storage=None,
            in_table_name=None,
            out_table_name=None,
            in_table_name_groups=None,
            other_in_table_names=None,
            attributes=None,
            use_groups=True,
            id_name=None,
            nchunks=None,
            debuglevel=0
            ):
        debug = DebugPrinter(debuglevel)
        debug.print_debug("Creating DevelopmentTypeDataset object.",2)
        resources = self._get_resources_for_dataset(
            in_table_name_default="development_types",
            in_table_name_groups_default="development_type_group_definitions",
            out_table_name_default="development_types",
            dataset_name="development_type",                                        
            resources = resources,
            in_storage = in_storage,
            out_storage = out_storage,
            in_table_name = in_table_name,
            out_table_name = out_table_name,
            in_table_name_groups = in_table_name_groups,
            attributes = attributes,
            id_name = id_name,
            id_name_default = self.id_name_default,
            debug = debug,
            )

        AbstractGroupDataset.__init__(self,
            resources=resources,
            other_in_table_names=other_in_table_names, 
            use_groups=use_groups
            )
Example #23
0
 def __init__(self, utility_class=None, probability_class=None, choice_class=None, resources=None, debuglevel=0):
     """utility_class, probability_class, choice_class are objects of the corresponding classes.
         They must have a method 'run'.
     """
     self.utility_class = utility_class
     self.probability_class = probability_class
     self.choice_class = choice_class
     self.resources = resources
     if self.resources == None:
         self.resources = Resources()
     self.utilities = None
     self.probabilities = None
     self.choices = None
     self.debug = DebugPrinter(debuglevel)
    def __init__(self,
                 resources=None,
                 data=None,
                 names=None,
                 in_storage=None,
                 out_storage=None,
                 in_table_name=None,
                 out_table_name=None,
                 attributes=None,
                 id_name=None,
                 nchunks=None,
                 debuglevel=0):
        debug = DebugPrinter(debuglevel)
        debug.print_debug("Creating LandUseDevelopmentDataset object.", 2)

        dataset_name = "landuse_development"
        nchunks_default = 1

        if data <> None:
            in_storage = StorageFactory().get_storage('dict_storage')

            in_storage.write_table(table_name='landuse_developments',
                                   table_data=data)
            in_table_name = 'landuse_developments'

        resources = ResourceFactory().get_resources_for_dataset(
            dataset_name,
            in_storage=in_storage,
            out_storage=out_storage,
            resources=resources,
            in_table_name_pair=(in_table_name, None),
            out_table_name_pair=(out_table_name, None),
            id_name_pair=(id_name, self.id_name_default),
            nchunks_pair=(nchunks, nchunks_default),
            debug_pair=(debug, None))

        Dataset.__init__(self, resources=resources)
 def __init__(self,
              resources=None,
              dataset1=None,
              dataset2=None,
              index1=None,
              index2=None,
              dataset_name=None,
              debug=None):
     """ Argument 'resources' is of type Resources. It is merged with arguments. It should contain:
             dataset1 - agent class
             dataset2 - class of the choice dataset
         Optional:
             index1 - 1D array, indices of dataset1
             index2 - If 2D array: row i contains indices of individuals of dataset2 that belong to
                     i-th individual of dataset1[index1].
                     If 1D array: indices of individuals of dataset2 for all individuals of dataset1[index1].
             dataset_name - subdirectory in which implementation of the interaction variables is placed (default "")
         dataset1.resources and dataset2.resources should contain key 'dataset_name' (see Dataset.get_dataset_name()).
     """
     self.resources = Resources(resources)
     self.resources.merge_if_not_None({
         "dataset1": dataset1,
         "dataset2": dataset2,
         "index1": index1,
         "index2": index2,
         "dataset_name": dataset_name,
         "debug": debug
     })
     self.attribute_boxes = {}
     self.attribute_names = []
     self.debug = self.resources.get("debug", 0)
     if not isinstance(self.debug, DebugPrinter):
         self.debug = DebugPrinter(self.debug)
     self.resources.check_obligatory_keys(["dataset1", "dataset2"])
     self.dataset1 = self.resources["dataset1"]
     self.dataset2 = self.resources["dataset2"]
     self.index1 = self.resources.get("index1", None)
     self.index2 = self.resources.get("index2", None)
     self.dataset_name = self.resources.get("dataset_name", None)
     if self.dataset_name == None:
         self.dataset_name = self.dataset1.get_dataset_name(
         ) + '_x_' + self.dataset2.get_dataset_name()
     self._primary_attribute_names = []
     self.index1_mapping = {}
     if self.index1 <> None:
         self.index1_mapping = do_id_mapping_dict_from_array(self.index1)
     self._id_names = None  # for compatibility with Dataset
     self.variable_factory = VariableFactory()
     self._aliases = {}  # for compatibility with Dataset
    def __init__(self,
                 group_member=None,
                 agents_grouping_attribute='job.building_type',
                 filter=None,
                 model_name=None,
                 model_short_name=None,
                 variable_package=None,
                 dataset_pool=None,
                 debuglevel=0):
        self.group_member = group_member
        if self.group_member:
            self.group_member.set_agents_grouping_attribute(
                agents_grouping_attribute)
        self.filter = filter
        self.dataset_pool = self.create_dataset_pool(dataset_pool,
                                                     ["urbansim", "opus_core"])
        self.debug = DebugPrinter(debuglevel)

        if model_name is not None:
            self.model_name = model_name
        if model_short_name is not None:
            self.model_short_name = model_short_name
        if variable_package is not None:
            self.variable_package = variable_package
Example #27
0
 def __init__(self,
              probabilities = "opus_core.upc.rate_based_probabilities",
              choices = "opus_core.random_choices",
              model_name = None,
              debuglevel=0,
              resources=None
              ):
     if model_name is not None:
         self.model_name = model_name
     self.debug = DebugPrinter(debuglevel)
     self.upc_sequence = None
     if probabilities is not None:
         self.upc_sequence = UPCFactory().get_model(utilities=None,
                                                    probabilities=probabilities,
                                                    choices=choices,
                                                    debuglevel=debuglevel)
     self.resources = merge_resources_if_not_None(resources)
 def __init__(self,
              probabilities = "urbansim.rate_based_probabilities",
              choices = "opus_core.random_choices",
              location_id_name="grid_id",
              model_name = "Agent Relocation Model",
              debuglevel=0,
              resources=None
              ):
     self.model_name = model_name
     self.location_id_name = location_id_name
     self.debug = DebugPrinter(debuglevel)
     self.upc_sequence = None
     if probabilities is not None:
         self.upc_sequence = UPCFactory().get_model(utilities=None,
                                                    probabilities=probabilities,
                                                    choices=choices,
                                                    debuglevel=debuglevel)
     self.resources = merge_resources_if_not_None(resources)
Example #29
0
 def __init__(self, group_member=None, 
              agents_grouping_attribute = 'job.building_type', 
              filter = None, 
              model_name=None,
              model_short_name=None,
              variable_package=None,
              dataset_pool=None,
              debuglevel=0):
     self.group_member = group_member
     if self.group_member:
         self.group_member.set_agents_grouping_attribute(agents_grouping_attribute)
     self.filter = filter
     self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"])
     self.debug = DebugPrinter(debuglevel)
     
     if model_name is not None:
         self.model_name = model_name
     if model_short_name is not None:
         self.model_short_name = model_short_name
     if variable_package is not None:
         self.variable_package = variable_package
Example #30
0
    def __init__(self,
                 resources=None,
                 in_storage=None,
                 out_storage=None,
                 in_table_name=None,
                 out_table_name=None,
                 attributes=None,
                 id_name=None,
                 nchunks=None,
                 debuglevel=0):
        try:
            debug = SessionConfiguration().get('debuglevel', 0)
        except:
            debug = 0
        debug = DebugPrinter(debug)
        if debuglevel > debug.flag:
            debug.flag = debuglevel

        debug.print_debug(
            "Creating object %s.%s" %
            (self.__class__.__module__, self.__class__.__name__), 2)

        resources = ResourceFactory().get_resources_for_dataset(
            self.dataset_name,
            resources=resources,
            in_storage=in_storage,
            in_table_name_pair=(in_table_name, self.in_table_name_default),
            attributes_pair=(attributes, self.attributes_default),
            out_storage=out_storage,
            out_table_name_pair=(out_table_name, self.out_table_name_default),
            id_name_pair=(id_name, self.id_name_default),
            nchunks_pair=(nchunks, self.nchunks_default),
            debug_pair=(debug, None),
        )

        CoreDataset.__init__(self, resources=resources)
Example #31
0
 def __init__(self, debuglevel=0):
     self.debug = DebugPrinter(debuglevel)
     self.model_name = "Building Transition Model"
class BusinessTransitionModel(Model):
    """Creates and removes businesses from business_set."""

    model_name = "Business Transition Model"
    location_id_name = "building_id"
    variable_package = "urbansim_parcel"

    def __init__(self, debuglevel=0):
        self.debug = DebugPrinter(debuglevel)

    def run(self, year, business_set,
            control_totals,
            data_objects=None,
            resources=None):
        business_id_name = business_set.get_id_name()[0]
        control_totals.get_attribute("total_number_of_businesses")
        idx = where(control_totals.get_attribute("year")==year)
        sectors = unique(control_totals.get_attribute_by_index("building_use_id", idx))
        max_id = business_set.get_id_attribute().max()
        business_size = business_set.size()
        new_businesses = {self.location_id_name:array([], dtype='int32'),
                          "building_use_id":array([], dtype='int32'),
                          business_id_name:array([], dtype='int32'),
                          "sqft":array([], dtype=int32),
                          "employees":array([], dtype=int32),}
        compute_resources = Resources(data_objects)
#        compute_resources.merge({job_building_types.get_dataset_name():job_building_types, "debug":self.debug})
        business_set.compute_variables(
            map(lambda x: "%s.%s.is_sector_%s"
                    % (self.variable_package, business_set.get_dataset_name(), x),
                sectors),
            resources = compute_resources)
        remove_businesses = array([], dtype='int32')

        for sector in sectors:
            total_businesses = control_totals.get_data_element_by_id((year,sector)).total_number_of_businesses
            is_in_sector = business_set.get_attribute("is_sector_%s" % sector)
            diff = int(total_businesses - is_in_sector.astype(int8).sum())

            if diff < 0: #
                w = where(is_in_sector == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(business_set, w, -1*diff,
                                                         self.location_id_name)
                remove_businesses = concatenate((remove_businesses, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff)-size_non_placed))))

            if diff > 0: #
                new_businesses[self.location_id_name]=concatenate((new_businesses[self.location_id_name],zeros((diff,), dtype="int32")))
                new_businesses["building_use_id"]=concatenate((new_businesses["building_use_id"],
                                                               sector*ones((diff,), dtype="int32")))

                available_business_index = where(is_in_sector)[0]
                sampled_business = probsample_replace(available_business_index, diff, None)

                new_businesses["sqft"] = concatenate((new_businesses["sqft"],
                                                     business_set.get_attribute("sqft")[sampled_business]))
                new_businesses["employees"] = concatenate((new_businesses["employees"],
                                                           business_set.get_attribute("employees")[sampled_business]))

                new_max_id = max_id+diff
                new_businesses[business_id_name]=concatenate((new_businesses[business_id_name], arange(max_id+1, new_max_id+1)))
                max_id = new_max_id

        business_set.remove_elements(remove_businesses)
        business_set.add_elements(new_businesses, require_all_attributes=False)
        difference = business_set.size()-business_size
        self.debug.print_debug("Difference in number of businesses: %s (original %s,"
            " new %s, created %s, deleted %s)"
                % (difference,
                   business_size,
                   business_set.size(),
                   new_businesses[business_id_name].size,
                   remove_businesses.size),
            3)
        self.debug.print_debug("Number of unplaced businesses: %s"
            % where(business_set.get_attribute(self.location_id_name) <=0)[0].size,
            3)
        return difference

    def prepare_for_run(self, storage, in_table_name, id_name, **kwargs):
        from urbansim.datasets.control_total_dataset import ControlTotalDataset
        control_totals = ControlTotalDataset(in_storage=storage,
                                             in_table_name=in_table_name,
                                             id_name=id_name
                                         )
#        sample_control_totals(storage, control_totals, **kwargs)
        return control_totals
 def __init__(self, resources=None, debuglevel=0):
     self.debug = DebugPrinter(debuglevel)
     self.resources = resources
     self.model_name = "Development Event Transition Model"
 def __init__(self, debuglevel=0):
     self.debug = DebugPrinter(debuglevel)
class EmploymentTransitionModel(Model):
    """Creates and removes jobs from job_set."""

    model_name = "Employment Transition Model"
    location_id_name_default = "grid_id"
    variable_package_default = "urbansim"

    def __init__(self,
                 location_id_name=None,
                 variable_package=None,
                 dataset_pool=None,
                 debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.location_id_name = self.location_id_name_default
        self.variable_package = self.variable_package_default
        if location_id_name is not None:
            self.location_id_name = location_id_name
        if variable_package is not None:
            self.variable_package = variable_package
        self.dataset_pool = self.create_dataset_pool(dataset_pool,
                                                     ["urbansim", "opus_core"])

    def run(self,
            year,
            job_set,
            control_totals,
            job_building_types,
            data_objects=None,
            resources=None):
        self._do_initialize_for_run(job_set, job_building_types, data_objects)
        idx = where(control_totals.get_attribute("year") == year)[0]
        self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
        self._do_run_for_this_year(job_set)
        return self._update_job_set(job_set)

    def _do_initialize_for_run(self,
                               job_set,
                               job_building_types,
                               data_objects=None):
        self.max_id = job_set.get_id_attribute().max()
        self.job_size = job_set.size()
        self.job_id_name = job_set.get_id_name()[0]
        self.new_jobs = {
            self.location_id_name:
            array([],
                  dtype=job_set.get_data_type(self.location_id_name, int32)),
            "sector_id":
            array([], dtype=job_set.get_data_type("sector_id", int32)),
            self.job_id_name:
            array([], dtype=job_set.get_data_type(self.job_id_name, int32)),
            "building_type":
            array([], dtype=job_set.get_data_type("building_type", int8))
        }
        self.remove_jobs = array([], dtype=int32)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included(
            {job_building_types.get_dataset_name(): job_building_types})
        self.available_building_types = job_building_types.get_id_attribute()

    def _compute_sector_variables(self, sectors, job_set):
        compute_resources = Resources({"debug": self.debug})
        job_set.compute_variables(map(
            lambda x: "%s.%s.is_in_employment_sector_%s_home_based" %
            (self.variable_package, job_set.get_dataset_name(), x),
            sectors) + map(
                lambda x: "%s.%s.is_in_employment_sector_%s_non_home_based" %
                (self.variable_package, job_set.get_dataset_name(), x),
                sectors) + ["is_non_home_based_job", "is_home_based_job"],
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)

    def _do_run_for_this_year(self, job_set):
        building_type = job_set.get_attribute("building_type")
        sectors = unique(
            self.control_totals_for_this_year.get_attribute("sector_id"))
        self._compute_sector_variables(sectors, job_set)
        for sector in sectors:
            isector = where(
                self.control_totals_for_this_year.get_attribute("sector_id") ==
                sector)[0]
            total_hb_jobs = self.control_totals_for_this_year.get_attribute(
                "total_home_based_employment")[isector]
            total_nhb_jobs = self.control_totals_for_this_year.get_attribute(
                "total_non_home_based_employment")[isector]
            is_in_sector_hb = job_set.get_attribute(
                "is_in_employment_sector_%s_home_based" % sector)
            is_in_sector_nhb = job_set.get_attribute(
                "is_in_employment_sector_%s_non_home_based" % sector)
            diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum())
            diff_nhb = int(total_nhb_jobs -
                           is_in_sector_nhb.astype(int8).sum())
            if diff_hb < 0:  # home based jobs to be removed
                w = where(is_in_sector_hb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_hb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate(
                    (self.remove_jobs, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff_hb) - size_non_placed))))
            if diff_nhb < 0:  # non home based jobs to be removed
                w = where(is_in_sector_nhb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_nhb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate(
                    (self.remove_jobs, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff_nhb) - size_non_placed))))

            if diff_hb > 0:  # home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate(
                    (self.new_jobs[self.location_id_name],
                     zeros(
                         (diff_hb, ),
                         dtype=self.new_jobs[self.location_id_name].dtype.type)
                     ))
                self.new_jobs["sector_id"] = concatenate(
                    (self.new_jobs["sector_id"], (resize(
                        array([sector],
                              dtype=self.new_jobs["sector_id"].dtype.type),
                        diff_hb))))
                if 1 in is_in_sector_hb:
                    building_type_distribution = array(
                        ndimage_sum(is_in_sector_hb,
                                    labels=building_type,
                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute(
                        "is_home_based_job"
                ):  # take the building type distribution from the whole region
                    building_type_distribution = array(
                        ndimage_sum(job_set.get_attribute("is_home_based_job"),
                                    labels=building_type,
                                    index=self.available_building_types))
                else:  # there are no home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(
                        self.available_building_types.size)
                    building_type_distribution = building_type_distribution / building_type_distribution.sum(
                    )
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_hb,
                    building_type_distribution /
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate(
                    (self.new_jobs["building_type"],
                     sampled_building_types.astype(
                         self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_hb
                self.new_jobs[self.job_id_name] = concatenate(
                    (self.new_jobs[self.job_id_name],
                     arange(self.max_id + 1, new_max_id + 1)))
                self.max_id = new_max_id

            if diff_nhb > 0:  # non home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate(
                    (self.new_jobs[self.location_id_name],
                     zeros(
                         (diff_nhb, ),
                         dtype=self.new_jobs[self.location_id_name].dtype.type)
                     ))
                self.new_jobs["sector_id"] = concatenate(
                    (self.new_jobs["sector_id"], (resize(
                        array([sector],
                              dtype=self.new_jobs["sector_id"].dtype.type),
                        diff_nhb))))
                if 1 in is_in_sector_nhb:
                    building_type_distribution = array(
                        ndimage_sum(is_in_sector_nhb,
                                    labels=building_type,
                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute(
                        "is_non_home_based_job"
                ):  # take the building type distribution from the whole region
                    building_type_distribution = array(
                        ndimage_sum(
                            job_set.get_attribute("is_non_home_based_job"),
                            labels=building_type,
                            index=self.available_building_types))
                else:  # there are no non-home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(
                        self.available_building_types.size)
                    building_type_distribution = building_type_distribution / building_type_distribution.sum(
                    )
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_nhb,
                    building_type_distribution /
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate(
                    (self.new_jobs["building_type"],
                     sampled_building_types.astype(
                         self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_nhb
                self.new_jobs[self.job_id_name] = concatenate(
                    (self.new_jobs[self.job_id_name],
                     arange(self.max_id + 1, new_max_id + 1)))
                self.max_id = new_max_id

    def _update_job_set(self, job_set):
        job_set.remove_elements(self.remove_jobs)
        job_set.add_elements(self.new_jobs, require_all_attributes=False)
        difference = job_set.size() - self.job_size
        self.debug.print_debug(
            "Difference in number of jobs: %s (original %s,"
            " new %s, created %s, deleted %s)" %
            (difference, self.job_size, job_set.size(),
             self.new_jobs[self.job_id_name].size, self.remove_jobs.size), 3)
        self.debug.print_debug(
            "Number of unplaced jobs: %s" %
            where(job_set.get_attribute(self.location_id_name) <= 0)[0].size,
            3)
        return difference

    def prepare_for_run(self, storage, **kwargs):
        from urbansim.datasets.control_total_dataset import ControlTotalDataset
        control_totals = ControlTotalDataset(in_storage=storage,
                                             what="employment")
        sample_control_totals(storage, control_totals, **kwargs)
        return control_totals
Example #36
0
class ScalingJobsModel(Model):
    """This model is used to place new jobs that are in specific employment sectors, 
    such as military and education, do not tend to create new business locations or move 
    existing business locations. It relocates given jobs according to the distribution of 
    the scalable jobs of different sectors.
    """
    model_name = "Scaling Jobs Model"
    model_short_name = "SJM"
    variable_package = "urbansim"
    
    def __init__(self, group_member=None, 
                 agents_grouping_attribute = 'job.building_type', 
                 filter = None, 
                 model_name=None,
                 model_short_name=None,
                 variable_package=None,
                 dataset_pool=None,
                 debuglevel=0):
        self.group_member = group_member
        if self.group_member:
            self.group_member.set_agents_grouping_attribute(agents_grouping_attribute)
        self.filter = filter
        self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"])
        self.debug = DebugPrinter(debuglevel)
        
        if model_name is not None:
            self.model_name = model_name
        if model_short_name is not None:
            self.model_short_name = model_short_name
        if variable_package is not None:
            self.variable_package = variable_package
     
    def run(self, location_set, agent_set, agents_index=None, data_objects=None,
            resources=None, **kwargs):
        """
            'location_set', 'agent_set' are of type Dataset,
            'agent_index' are indices of individuals in the agent_set for which 
            the model runs. If it is None, the whole agent_set is considered.
            'data_objects' is a dictionary where each key is the name of an data object 
            ('zone', ...) and its value is an object of class  Dataset.
        """
        if isinstance(agents_index,list):
            agents_index=array(agents_index)
        if agents_index is None:
            agents_index=arange(agent_set.size())
        if self.group_member:
            new_agents_index = self.group_member.get_index_of_my_agents(agent_set, agents_index)
        else:
            new_agents_index = arange(agents_index.size)
        self.debug.print_debug("Number of scalable jobs: " + str(agents_index[new_agents_index].size),2)
        choices = self._do_run(location_set, agent_set, agents_index[new_agents_index], data_objects, resources)
        result = resize(array([-1], dtype=choices.dtype), agents_index.size)
        result[new_agents_index] = choices
        return result
    
    def _do_run(self, location_set, agent_set, agents_index, data_objects=None, resources=None):
        location_id_name = location_set.get_id_name()[0]
        jobsubset = DatasetSubset(agent_set, agents_index)
        if jobsubset.size() <= 0:
            return array([], dtype='int32')
        #unplace jobs
        agent_set.set_values_of_one_attribute(location_id_name, 
                                              resize(array([-1.0]), jobsubset.size()), agents_index)
        sector_ids = jobsubset.get_attribute("sector_id")
        sectors = unique(sector_ids)
        counts = ndimage_sum(ones((jobsubset.size(),)), labels=sector_ids.astype('int32'), index=sectors.astype('int32'))
        if sectors.size <=1 :
            counts = array([counts])
        variables = map(lambda x: "number_of_jobs_of_sector_"+str(int(x)), sectors)
        compute_variables = map(lambda var: self.variable_package + "." + 
            location_set.get_dataset_name()+ "." + var, variables)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included({agent_set.get_dataset_name():agent_set})
        location_set.compute_variables(compute_variables, dataset_pool=self.dataset_pool)
        if self.filter is None:
            location_index = arange(location_set.size())
        else:
            filter_values = location_set.compute_variables([self.filter], dataset_pool=self.dataset_pool)
            location_index = where(filter_values > 0)[0]
        if location_index.size <= 0:
            logger.log_status("No locations available. Nothing to be done.")
            return array([])
        location_subset = DatasetSubset(location_set, location_index)
        i=0
        for sector in sectors:
            distr = location_subset.get_attribute(variables[i])
            if ma.allclose(distr.sum(), 0):
                uniform_prob = 1.0/distr.size
                distr = resize(array([uniform_prob], dtype='float64'), distr.size)
                logger.log_warning("Probabilities in scaling model for sector " + str(sector) + " sum to 0.0.  Substituting uniform distribution!")
#                random_sample = sample(location_set.get_attribute("grid_id"), k=int(counts[i]), \
#                                   probabilities = distr)
            distr = distr/float(distr.sum())
            random_sample = probsample_replace(location_subset.get_id_attribute(), size=int(counts[i]), 
                                       prob_array=distr)
            idx = where(sector_ids == sector)[0]
            #modify job locations
            agent_set.set_values_of_one_attribute(location_id_name, random_sample, agents_index[idx])
            i+=1
        return agent_set.get_attribute_by_index(location_id_name, agents_index)
 
    def prepare_for_run(self, agent_set=None, agents_filter=None, agents_index=None):
        if agent_set is None or agents_filter is None:
            return agents_index
        filter = agent_set.compute_variables([agents_filter], dataset_pool=self.dataset_pool)
        if agents_index is not None:
            tmp = zeros(agent_set.size(), dtype='bool8')
            tmp[agents_index]=True
            filtered_index = logical_and(filter, tmp)
        return where(filtered_index)[0]
Example #37
0
 def get_variable(self, variable_name, dataset, quiet=False, debug=0, index_name=None):
     """Returns an instance of class Variable. 
     'variable_name' is an instance of class VariableName. 
     'dataset' is an object of class Dataset to which the variable belongs to. 
     In case of an error in either importing the module or evaluating its constructor, 
     the method returns None.
     If quiet is True no warnings are printed.
     index_name is used for lag variables only.
     """
     lag_attribute_name = None
     lag_offset = 0
         
     if not isinstance(debug, DebugPrinter):
         debug = DebugPrinter(debug)
         
     if variable_name.get_autogen_class() is not None:
         # variable_name has an autogenerated class -- just use that
         variable_subclass = variable_name.get_autogen_class()
         substrings = ()
     else:
         # either find the variable name in the expression library (if present), in an appropriate 'aliases' file, 
         # or load our variable class as 'variable_subclass' using an import statement
         short_name = variable_name.get_short_name()
         dataset_name = variable_name.get_dataset_name()
         package_name = variable_name.get_package_name()
         # if there isn't a package name, first look in the expression library (if there is a package name, look elsewhere)
         if package_name is None:
             e = VariableFactory._expression_library.get( (dataset_name,short_name), None)
             if e is not None:
                 if e == variable_name.get_expression(): # it is a primary attribute
                     return None
                 v = VariableName(e)
                 return VariableFactory().get_variable(v, dataset, quiet=quiet, debug=debug)
         else:
             # not in the expression library - next look in the appropriate 'aliases' file, if one is present
             # (but only if we have a package name in the first place)
             try:
                 stmt = 'from %s.%s.aliases import aliases' % (package_name, dataset_name)
                 exec(stmt)
             except ImportError:
                 aliases = []
             for a in aliases:
                 # for each definition, see if the alias is equal to the short_name.  If it is,
                 # then use that definition for the variable
                 v = VariableName(a)
                 if v.get_alias() == short_name:
                     return VariableFactory().get_variable(v, dataset, quiet=quiet, debug=debug)
         
         lag_variable_parser = LagVariableParser()
         if lag_variable_parser.is_short_name_for_lag_variable(short_name):
             lag_attribute_name, lag_offset = lag_variable_parser.parse_lag_variable_short_name(short_name)
             true_short_name = "VVV_lagLLL"
             substrings = (package_name, lag_attribute_name, lag_offset, dataset_name, index_name)
             opus_path = 'opus_core.variables'
             
         else:      
             if package_name is None:
                 raise LookupError("Incomplete variable specification for '%s.%s' (missing package name, "
                                   "and variable is not in expression library not a lag variable)." 
                                   % (dataset_name, short_name))
             
             opus_path = '%s.%s' % (package_name,dataset_name)
                 
             true_short_name, substrings = VariableFamilyNameTranslator().\
                     get_translated_variable_name_and_substring_arguments(opus_path, short_name)
             
         module = '%s.%s' % (opus_path, true_short_name)
         
         # Note that simply checking for the .py module file would not
         # be safe here, as objects could be instantiated in __init__.py files.
         try:
             ev = "from %s import %s as variable_subclass" % (module, true_short_name)
             debug.print_debug("Evaluating '" + ev + "'.",12)
             exec(ev)
             debug.print_debug("Successful.", 12)
         except ImportError, e:
             if not quiet:
                 from opus_core.simulation_state import SimulationState
                 time = SimulationState().get_current_time()
                 desc = '\n'.join(("Opus variable '%s' does not exist for dataset '%s' in year %s. "
                                   "The following error occured when finally trying to import "
                                   "the variable '%s' from the Python module "
                                   "'%s':",
                                   "%s",
                                  )) % (true_short_name, opus_path, time,
                                        true_short_name,
                                        module,
                                        indent_text(formatPlainTextExceptionInfoWithoutLog('')))
                 raise NameError(desc)
             return None
Example #38
0
class Variable(object):
    """Abstract base class for variables. Each variable implementation must be 
    a subclass of this class, placed in a module that has the same name 
    as the variable class. Each variable class is expected to contain a method "compute" 
    that takes one argument "arguments". It is of type Resources and can contain 
    anything that the compute method might need. 
    The 'compute' method  returns a result of the computation which should be 
    an array of size self.get_dataset().size().
    
    Each variable class can contain a method "dependencies" which returns a list 
    of attributes/variables that this class is dependent on.  The dependencies list 
    is a list of fully (or dataset) qualified variable names, one for each 
    dependent variable. All dependent datasets must be included in 'arguments'.    
    
    Each variable may have a pre- and post-check that will perform checks on the
    variable's inputs and the variable's results.  This allows each variable's
    implementation to specify a contract about what it does.  
    
    The 'check_variables' entry of the 'arguments' defines what variables to check
    (see method 'should_check'). If a variable is required to be checked, the 
    'S' method for that variable is called before the variable's 'compute' 
    method, and the 'post_check' method for that variable is called after the 
    variable's 'compute' method.  Both 'pre_check' and 'post_check' take 2
    arguments: values (the results from the 'compute' method), and 'arguments'.
    
    In case of using 'compute_with_dependencies' the datasets for which variables 
    are computed, are expected to have a method 'compute_variables' that 
    takes at least three arguments: name of the variable, package name and 
    an object of class Resources. This dataset method should
    use the Variable method 'compute_with_dependencies' in order to work recursively 
    through dependency trees (see compute_variables and _compute_one_variable of 
    opus_core.Dataset).
    
    The return type of this variable is defined by it's _return_type property, which
    may have one of the following numpy types: "bool8", "int8", "uint8", "int16", 
    "uint16", "int32", "uint32", "int64", "uint64", "float32", "float64", "complex64",
    "complex128", "longlong".
    """
    _return_type = None

    def __new__(cls, *args, **kwargs):
        """Setup to automatically log the running time of the compute method."""

        an_instance = object.__new__(cls)
        compute_method = an_instance.compute_with_dependencies

        def logged_method(*req_args, **opt_args):
            logger.start_block(name=an_instance.name(), verbose=False)
            try:
                results = compute_method(*req_args, **opt_args)
                an_instance._do_flush_dependent_variables_if_required()
            finally:
                logger.end_block()
            return results

        an_instance.compute_with_dependencies = logged_method
        return an_instance

    def __init__(self):
        self.dependencies_list = None
        self.dataset = None
        self.number_of_compute_runs = 0
        try:
            self.debug = SessionConfiguration().get('debuglevel', 0)
        except:
            self.debug = 0
        if isinstance(self.debug, int):
            self.debug = DebugPrinter(self.debug)

    def name(self):
        return self.__module__

    def _do_flush_dependent_variables_if_required(self):
        try:
            if not SessionConfiguration().get('flush_variables', False):
                return
        except:
            return
        from opus_core.datasets.interaction_dataset import InteractionDataset
        dataset = self.get_dataset()
        dependencies = self.get_current_dependencies()
        my_dataset_name = dataset.get_dataset_name()
        for iattr in range(
                len(dependencies)):  # iterate over dependent variables
            dep_item = dependencies[iattr][0]
            if isinstance(dep_item, str):
                depvar_name = VariableName(dep_item)
            else:
                depvar_name = dep_item.get_variable_name(
                )  # dep_item should be an instance of AttributeBox
            dataset_name = depvar_name.get_dataset_name()
            if dataset_name == my_dataset_name:
                ds = dataset
            else:
                ds = SessionConfiguration().get_dataset_from_pool(dataset_name)
                #ds = dataset_pool.get_dataset('dataset_name')
            if not isinstance(ds, InteractionDataset):
                short_name = depvar_name.get_alias()
                if short_name not in ds.get_id_name():
                    ds.flush_attribute(depvar_name)

    def compute(self, dataset_pool):
        """Returns the result of this variable.  Private use only."""
        raise NotImplementedError, "compute() method not implemented for this variable."

    def is_lag_variable(self):
        """Not a lag variable unless this function has been overridden to return True"""
        return False

    def _compute_and_check(self, dataset_pool):
        if has_this_method(self, "pre_check"):
            self.debug.print_debug(
                "Running pre_check() for " + self.__class__.__module__, 4)
            self.pre_check(dataset_pool)
        else:
            self.debug.print_debug(
                "No pre_check() defined for " + self.__class__.__module__, 4)
        values = self.compute(dataset_pool)
        if has_this_method(self, "post_check"):
            self.debug.print_debug(
                "Running post_check() for " + self.__class__.__module__, 4)
            self.post_check(values, dataset_pool)
        else:
            self.debug.print_debug(
                "No post_check() defined for " + self.__class__.__module__, 4)
        return values

    def compute_with_dependencies(self, dataset_pool, arguments={}):
        self._solve_dependencies(dataset_pool)
        if self.should_check(arguments):
            self.debug.print_debug(
                "Computing and checking " + self.__class__.__module__, 3)
            values = self._compute_and_check(dataset_pool)
        else:
            values = self.compute(dataset_pool)
        self.number_of_compute_runs += 1
        if self._return_type:
            return self._cast_values(values, arguments)
        return values

    if longlong == int32:
        __long_size = 2**31 - 1
    else:
        __long_size = 2**63 - 1

    _max_storable_value = {
        "bool8": 1,
        "int8": 2**7 - 1,
        "uint8": 2**8 - 1,
        "int16": 2**15 - 1,
        "uint16": 2**16 - 1,
        "int32": 2**31 - 1,
        "uint32": 2**32 - 1,
        "int64": 2**63 - 1,
        "uint64": 2**64 - 1,
        "float32": 3.40282346638528860e+38,
        "float64": 1.79769313486231570e+308,
        "complex64": 3.40282346638528860e+38,
        "complex128": 1.79769313486231570e+308,
        "longlong": __long_size,
    }

    def _cast_values(self, values, arguments):
        """Change the return values to be of type self._return_type.
        If "should_check" is defined, first check for 
        values that are too large for the destination type or
        integer wrap-around."""
        type = values.dtype.str
        if self._return_type == type:
            return values
        if self.should_check(arguments):
            max_value = ma.maximum(values)
            if max_value > self._max_storable_value[self._return_type]:
                max_value_str = str(max_value)
                logger.log_error(
                    "Variable '%s' is being cast to type '%s', but contains a value (%s) too large to fit into that type."
                    % (self.name(), self._return_type, max_value_str))
        return values.astype(self._return_type)

    def _solve_dependencies(self, dataset_pool):
        dataset = self.get_dataset()
        my_dataset_name = dataset.get_dataset_name()
        dependencies_list = self.get_current_dependencies()
        for i in range(len(dependencies_list)):  # compute dependent variables
            dep_item = dependencies_list[i][0]
            if isinstance(dep_item, str):
                depvar_name = VariableName(dep_item)
            else:
                depvar_name = dep_item.get_variable_name(
                )  # dep_item should be an instance of AttributeBox
            dataset_name = depvar_name.get_dataset_name()
            version = dependencies_list[i][1]
            if dataset_name == my_dataset_name:
                ds = dataset
            else:
                ds = dataset_pool.get_dataset(dataset_name)
            (new_versions,
             value) = ds.compute_variables_return_versions_and_final_value(
                 [(depvar_name, version)], dataset_pool)
            self.dependencies_list[i] = (ds._get_attribute_box(depvar_name),
                                         new_versions[0])

    def get_all_dependencies(self):
        """Return all variables and attributes needed to compute this variable.  
        This is returned as a list of tuples where the first element is either AttributeBox or 
        VariableName of the dependent variable and the second element is the version for 
        which this variable was computed.
        """
        def create_fake_dataset(dataset_name):
            storage = StorageFactory().get_storage('dict_storage')

            storage.write_table(table_name='fake_dataset',
                                table_data={'id': array([], dtype='int32')})

            dataset = Dataset(in_storage=storage,
                              in_table_name='fake_dataset',
                              dataset_name=dataset_name,
                              id_name="id")
            return dataset

        result_others = []
        dependencies_list = self.get_current_dependencies()
        for i in range(len(dependencies_list)):
            dep_item = dependencies_list[i][0]
            version = dependencies_list[i][1]
            isprimary = 0
            if isinstance(dep_item, str):
                depvar_name = VariableName(dep_item)
                dataset_name = depvar_name.get_dataset_name()
                var = VariableFactory().get_variable(
                    depvar_name, create_fake_dataset(dataset_name), quiet=True)
                result_others = result_others + [(depvar_name, version)]
            else:  # dep_item should be an instance of AttributeBox
                var = dep_item.get_variable_instance()
                result_others = result_others + [(dep_item, version)]
                isprimary = dep_item.is_primary()

            if (var <> None) and (not isprimary):
                res = var.get_all_dependencies()
                result_others = result_others + res
        return result_others

    def get_dependencies(self):
        """Return variables and attributes needed to compute this variable.  
        This is returned as a list of tuples where the first element is the 
        name of the particular dataset and the second element is the variable 
        name. It does not work through the dependencies tree.
        """
        if has_this_method(self, "dependencies"):
            return self.dependencies()
        return []

    def add_dependencies(self, dep_list=[]):
        """Can be used within 'compute' method to add dependencies. It is performed only 
        when the compute method runs for the first time.
        dep_list can be either a list of character strings or a list of AttributeBoxes."""
        if self.number_of_compute_runs == 0:
            if isinstance(dep_list, str):
                dep_list = [dep_list]
            self.dependencies_list = self.dependencies_list + map(
                lambda x: (x, 0), dep_list)

    def add_and_solve_dependencies(self, dep_list=[], dataset_pool=None):
        """Calls 'add_dependencies' and if it is run for the first time, it also calls the 
        '_solve_dependencies' method."""
        self.add_dependencies(dep_list)
        if self.number_of_compute_runs == 0:
            self._solve_dependencies(dataset_pool)

    def get_current_dependencies(self):
        if self.dependencies_list is None:
            self.dependencies_list = map(lambda x: (x, 0),
                                         self.get_dependencies())
        return self.dependencies_list

    def do_check(self, condition_str, values):
        def condition(x):
            return eval(condition_str)

        # This is a bit ugly, but the upgrade from Python 2.3.5 to
        # Python 2.4 broke backward compatability in regard to map and
        # numpy's rank-0 arrays. This attempts to detect a rank-0
        # array and convert it into something usable.
        try:
            try:
                len(values)
            except TypeError:
                values = array([values[()]])
        except:
            pass

        count = where(
            array(map(lambda x: not (condition(x)), values)) > 0)[0].size

        if (count > 0):
            logger.log_warning(
                "Variable %s fails %d times on check %s" %
                (self.__class__.__module__, count, condition_str))

    def should_check(self, arguments=None):
        """Return True if this variable should be checked, otherwise False. The information of what
        variables to check is provided in the 'arguments' entry "check_variables". 
        If "check_variables" is missing or is None or is an empty list, do no checks. 
        If "check_variables" is '*', check all variables.
        If "check_variables" is a list containing this variable's name, check this variable. 
        """
        if not isinstance(arguments, Resources):
            return False
        check_variables = arguments.get("check_variables", None)
        if check_variables == None:
            return False
        if (check_variables == '*') or \
           (isinstance(check_variables, list) and (len(check_variables) > 0) and
            (self.__class__.__name__ in check_variables)):
            return True
        return False

    def are_dependent_variables_up_to_date(self, version):
        result = []
        all_dependencies_list = self.get_all_dependencies()
        for variable, version in all_dependencies_list:
            if isinstance(variable, AttributeBox):
                result.append(variable.is_version(version))
            else:  # of type VariableName (means variable wasn't used yet)
                result.append(False)
        return result

    def get_highest_version_of_dependencies(self):
        dependencies_list = self.get_current_dependencies()
        if len(dependencies_list) <= 0:
            return 0
        versions = array(map(lambda x: x[1], dependencies_list))
        return versions.max()

    def set_dataset(self, dataset):
        self.dataset = dataset

    def get_dataset(self):
        return self.dataset

    def safely_divide_two_arrays(self,
                                 numerator,
                                 denominator,
                                 value_for_divide_by_zero=0.0):
        """Returns the result of numerator/denominator with the value_for_divide_by_zero 
        wherever denominator == 0.
        """
        return ma.filled(
            numerator / ma.masked_where(denominator == 0, denominator),
            value_for_divide_by_zero)

    def safely_divide_two_attributes(self,
                                     numerator_name,
                                     denominator_name,
                                     value_for_divide_by_zero=0.0):
        """Returns the result of dividing the numerator_name attribute of this variable
        by the denominator_name attribute of this variable; return the value_for_divide_by_zero 
        wherever denominator == 0.
        """
        numerator = self.get_dataset().get_attribute(numerator_name)
        denominator = self.get_dataset().get_attribute(denominator_name)
        return self.safely_divide_two_arrays(numerator, denominator,
                                             value_for_divide_by_zero)
class RegressionModel(ChunkModel):

    model_name = "Regression Model"
    model_short_name = "RM"

    def __init__(self,
                 regression_procedure="opus_core.linear_regression",
                 submodel_string=None,
                 run_config=None,
                 estimate_config=None,
                 debuglevel=0,
                 dataset_pool=None):

        self.debug = DebugPrinter(debuglevel)

        self.dataset_pool = self.create_dataset_pool(dataset_pool)

        self.regression = RegressionModelFactory().get_model(
            name=regression_procedure)
        if self.regression == None:
            raise StandardError, "No regression procedure given."

        self.submodel_string = submodel_string

        self.run_config = run_config
        if self.run_config == None:
            self.run_config = Resources()
        if not isinstance(self.run_config, Resources) and isinstance(
                self.run_config, dict):
            self.run_config = Resources(self.run_config)

        self.estimate_config = estimate_config
        if self.estimate_config == None:
            self.estimate_config = Resources()
        if not isinstance(self.estimate_config, Resources) and isinstance(
                self.estimate_config, dict):
            self.estimate_config = Resources(self.estimate_config)

        self.data = {}
        self.coefficient_names = {}
        ChunkModel.__init__(self)
        self.get_status_for_gui().initialize_pieces(3,
                                                    pieces_description=array([
                                                        'initialization',
                                                        'computing variables',
                                                        'submodel: 1'
                                                    ]))

    def run(self,
            specification,
            coefficients,
            dataset,
            index=None,
            chunk_specification=None,
            data_objects=None,
            run_config=None,
            initial_values=None,
            procedure=None,
            debuglevel=0):
        """'specification' is of type EquationSpecification,
            'coefficients' is of type Coefficients,
            'dataset' is of type Dataset,
            'index' are indices of individuals in dataset for which
                        the model runs. If it is None, the whole dataset is considered.
            'chunk_specification' determines  number of chunks in which the simulation is processed.
            'data_objects' is a dictionary where each key is the name of an data object
            ('zone', ...) and its value is an object of class  Dataset.
           'run_config' is of type Resources, it gives additional arguments for the run.
           If 'procedure' is given, it overwrites the regression_procedure of the constructor.
           'initial_values' is an array of the initial values of the results. It will be overwritten
           by the results for those elements that are handled by the model (defined by submodels in the specification).
           By default the results are initialized with 0.
            'debuglevel' overwrites the constructor 'debuglevel'.
        """
        self.debug.flag = debuglevel
        if run_config == None:
            run_config = Resources()
        if not isinstance(run_config, Resources) and isinstance(
                run_config, dict):
            run_config = Resources(run_config)
        self.run_config = run_config.merge_with_defaults(self.run_config)
        self.run_config.merge({"debug": self.debug})
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if procedure is not None:
            self.regression = RegressionModelFactory().get_model(
                name=procedure)
        if initial_values is None:
            self.initial_values = zeros((dataset.size(), ), dtype=float32)
        else:
            self.initial_values = zeros((dataset.size(), ),
                                        dtype=initial_values.dtype)
            self.initial_values[index] = initial_values

        if dataset.size() <= 0:  # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())

        result = ChunkModel.run(self,
                                chunk_specification,
                                dataset,
                                index,
                                float32,
                                specification=specification,
                                coefficients=coefficients)
        return result

    def run_chunk(self, index, dataset, specification, coefficients):
        self.specified_coefficients = SpecifiedCoefficients().create(
            coefficients, specification, neqs=1)
        compute_resources = Resources({"debug": self.debug})
        submodels = self.specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(
            submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(submodels,
                                     self.submodel_string,
                                     dataset,
                                     index,
                                     dataset_pool=self.dataset_pool,
                                     resources=compute_resources)
        variables = self.specified_coefficients.get_full_variable_names_without_constants(
        )
        self.debug.print_debug("Compute variables ...", 4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables,
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)
        data = {}
        coef = {}
        outcome = self.initial_values[index].copy()
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(
                self.specified_coefficients, submodel)
            self.coefficient_names[submodel] = coef[
                submodel].get_coefficient_names_without_constant()[0, :]
            self.debug.print_debug(
                "Compute regression for submodel " + str(submodel), 4)
            self.increment_current_status_piece()
            self.data[submodel] = dataset.create_regression_data(
                coef[submodel],
                index=index[self.observations_mapping[submodel]])
            nan_index = where(isnan(self.data[submodel]))[1]
            inf_index = where(isinf(self.data[submodel]))[1]
            if nan_index.size > 0:
                nan_var_index = unique(nan_index)
                raise ValueError, "NaN(Not A Number) is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % coef[
                    submodel].get_variable_names()[nan_var_index]
            if inf_index.size > 0:
                inf_var_index = unique(inf_index)
                raise ValueError, "Inf is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % coef[
                    submodel].get_variable_names()[inf_var_index]

            if (self.data[submodel].shape[0] >
                    0) and (self.data[submodel].size >
                            0):  # observations for this submodel available
                outcome[self.observations_mapping[submodel]] = \
                    self.regression.run(self.data[submodel], coef[submodel].get_coefficient_values()[0,:],
                        resources=self.run_config).astype(outcome.dtype)
        return outcome

    def correct_infinite_values(self,
                                dataset,
                                outcome_attribute_name,
                                maxvalue=1e+38,
                                clip_all_larger_values=False):
        """Check if the model resulted in infinite values. If yes,
        print warning and clip the values to maxvalue. 
        If clip_all_larger_values is True, all values larger than maxvalue are clip to maxvalue.
        """
        infidx = where(dataset.get_attribute(outcome_attribute_name) == inf)[0]

        if infidx.size > 0:
            logger.log_warning("Infinite values in %s. Clipped to %s." %
                               (outcome_attribute_name, maxvalue))
            dataset.set_values_of_one_attribute(outcome_attribute_name,
                                                maxvalue, infidx)
        if clip_all_larger_values:
            idx = where(
                dataset.get_attribute(outcome_attribute_name) > maxvalue)[0]
            if idx.size > 0:
                logger.log_warning(
                    "Values in %s larger than %s. Clipped to %s." %
                    (outcome_attribute_name, maxvalue, maxvalue))
                dataset.set_values_of_one_attribute(outcome_attribute_name,
                                                    maxvalue, idx)

    def estimate(self,
                 specification,
                 dataset,
                 outcome_attribute,
                 index=None,
                 procedure=None,
                 data_objects=None,
                 estimate_config=None,
                 debuglevel=0):
        """'specification' is of type EquationSpecification,
            'dataset' is of type Dataset,
            'outcome_attribute' - string that determines the dependent variable,
            'index' are indices of individuals in dataset for which
                    the model runs. If it is None, the whole dataset is considered.
            'procedure' - name of the estimation procedure. If it is None,
                there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class
                must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'.
                It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays).
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure.
            'debuglevel' overwrites the class 'debuglevel'.
        """
        #import wingdbstub
        self.debug.flag = debuglevel
        if estimate_config == None:
            estimate_config = Resources()
        if not isinstance(estimate_config, Resources) and isinstance(
                estimate_config, dict):
            estimate_config = Resources(estimate_config)
        self.estimate_config = estimate_config.merge_with_defaults(
            self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.procedure = procedure
        if self.procedure == None:
            self.procedure = self.estimate_config.get("estimation", None)
        if self.procedure is not None:
            self.procedure = ModelComponentCreator().get_model_component(
                self.procedure)
        else:
            logger.log_warning(
                "No estimation procedure given, or problems with loading the corresponding module."
            )

        compute_resources = Resources({"debug": self.debug})
        if dataset.size() <= 0:  # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())
        if not isinstance(index, ndarray):
            index = array(index)

        estimation_size_agents = self.estimate_config.get(
            "estimation_size_agents",
            None)  # should be a proportion of the agent_set
        if estimation_size_agents == None:
            estimation_size_agents = 1.0
        else:
            estimation_size_agents = max(min(estimation_size_agents, 1.0),
                                         0.0)  # between 0 and 1

        if estimation_size_agents < 1.0:
            self.debug.print_debug("Sampling agents for estimation ...", 3)
            estimation_idx = sample_noreplace(
                arange(index.size), int(index.size * estimation_size_agents))
        else:
            estimation_idx = arange(index.size)

        estimation_idx = index[estimation_idx]
        self.debug.print_debug(
            "Number of observations for estimation: " +
            str(estimation_idx.size), 2)
        if estimation_idx.size <= 0:
            self.debug.print_debug("Nothing to be done.", 2)
            return (None, None)

        coefficients = create_coefficient_from_specification(specification)
        specified_coefficients = SpecifiedCoefficients().create(coefficients,
                                                                specification,
                                                                neqs=1)
        submodels = specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(
            submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(
            submodels,
            self.submodel_string,
            dataset,
            estimation_idx,
            dataset_pool=self.dataset_pool,
            resources=compute_resources,
            submodel_size_max=self.estimate_config.get('submodel_size_max',
                                                       None))
        variables = specified_coefficients.get_full_variable_names_without_constants(
        )
        self.debug.print_debug("Compute variables ...", 4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables,
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)

        coef = {}
        estimated_coef = {}
        self.outcome = {}
        dataset.compute_variables([outcome_attribute],
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)
        regression_resources = Resources(estimate_config)
        regression_resources.merge({"debug": self.debug})
        outcome_variable_name = VariableName(outcome_attribute)
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(
                specified_coefficients, submodel)
            self.increment_current_status_piece()
            logger.log_status("Estimate regression for submodel " +
                              str(submodel),
                              tags=["estimate"],
                              verbosity_level=2)
            logger.log_status("Number of observations: " +
                              str(self.observations_mapping[submodel].size),
                              tags=["estimate"],
                              verbosity_level=2)
            self.data[
                submodel] = dataset.create_regression_data_for_estimation(
                    coef[submodel],
                    index=estimation_idx[self.observations_mapping[submodel]])
            self.coefficient_names[submodel] = coef[
                submodel].get_coefficient_names_without_constant()[0, :]
            if (self.data[submodel].shape[0] > 0
                ) and (self.data[submodel].size > 0) and (
                    self.procedure
                    is not None):  # observations for this submodel available
                self.outcome[submodel] = dataset.get_attribute_by_index(
                    outcome_variable_name.get_alias(),
                    estimation_idx[self.observations_mapping[submodel]])
                regression_resources.merge({"outcome": self.outcome[submodel]})
                regression_resources.merge({
                    "coefficient_names":
                    self.coefficient_names[submodel].tolist(),
                    "constant_position":
                    coef[submodel].get_constants_positions()
                })
                estimated_coef[submodel] = self.procedure.run(
                    self.data[submodel],
                    self.regression,
                    resources=regression_resources)
                if "estimators" in estimated_coef[submodel].keys():
                    coef[submodel].set_coefficient_values(
                        estimated_coef[submodel]["estimators"])
                if "standard_errors" in estimated_coef[submodel].keys():
                    coef[submodel].set_standard_errors(
                        estimated_coef[submodel]["standard_errors"])
                if "other_measures" in estimated_coef[submodel].keys():
                    for measure in estimated_coef[submodel][
                            "other_measures"].keys():
                        coef[submodel].set_measure(
                            measure, estimated_coef[submodel]["other_measures"]
                            [measure])
                if "other_info" in estimated_coef[submodel].keys():
                    for info in estimated_coef[submodel]["other_info"]:
                        coef[submodel].set_other_info(
                            info, estimated_coef[submodel]["other_info"][info])
        coefficients.fill_coefficients(coef)

        self.save_predicted_values_and_errors(specification,
                                              coefficients,
                                              dataset,
                                              outcome_variable_name,
                                              index=index,
                                              data_objects=data_objects)

        return (coefficients, estimated_coef)

    def prepare_for_run(self,
                        dataset=None,
                        dataset_filter=None,
                        filter_threshold=0,
                        **kwargs):
        spec, coef = prepare_specification_and_coefficients(**kwargs)
        if (dataset is not None) and (dataset_filter is not None):
            filter_values = dataset.compute_variables(
                [dataset_filter], dataset_pool=self.dataset_pool)
            index = where(filter_values > filter_threshold)[0]
        else:
            index = None
        return (spec, coef, index)

    def prepare_for_estimate(self,
                             dataset=None,
                             dataset_filter=None,
                             filter_threshold=0,
                             **kwargs):
        spec = get_specification_for_estimation(**kwargs)
        if (dataset is not None) and (dataset_filter is not None):
            filter_values = dataset.compute_variables(
                [dataset_filter], dataset_pool=self.dataset_pool)
            index = where(filter_values > filter_threshold)[0]
        else:
            index = None
        return (spec, index)

    def get_data_as_dataset(self, submodel=-2):
        """Like get_all_data, but the retuning value is a Dataset containing attributes that
        correspond to the data columns. Their names are coefficient names."""
        all_data = self.get_all_data(submodel)
        if all_data is None:
            return None
        names = self.get_coefficient_names(submodel)
        if names is None:
            return None
        dataset_data = {}
        for i in range(names.size):
            dataset_data[names[i]] = all_data[:, i].reshape(all_data.shape[0])
        dataset_data["id"] = arange(all_data.shape[0]) + 1
        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name='dataset', table_data=dataset_data)
        ds = Dataset(in_storage=storage, id_name="id", in_table_name='dataset')
        return ds

    def save_predicted_values_and_errors(self,
                                         specification,
                                         coefficients,
                                         dataset,
                                         outcome_variable,
                                         index=None,
                                         data_objects=None):
        if self.estimate_config.get('save_predicted_values_and_errors', False):
            logger.log_status('Computing predicted values and residuals.')
            original_values = dataset.get_attribute_by_index(
                outcome_variable, index)
            predicted_values = zeros(dataset.size(), dtype='float32')
            predicted_values[index] = self.run_after_estimation(
                specification,
                coefficients,
                dataset,
                index=index,
                data_objects=data_objects)
            predicted_attribute_name = 'predicted_%s' % outcome_variable.get_alias(
            )
            dataset.add_primary_attribute(name=predicted_attribute_name,
                                          data=predicted_values)
            dataset.flush_attribute(predicted_attribute_name)
            predicted_error_attribute_name = 'residuals_%s' % outcome_variable.get_alias(
            )
            error_values = zeros(dataset.size(), dtype='float32')
            error_values[index] = (original_values -
                                   predicted_values[index]).astype(
                                       error_values.dtype)
            dataset.add_primary_attribute(name=predicted_error_attribute_name,
                                          data=error_values)
            dataset.flush_attribute(predicted_error_attribute_name)
            logger.log_status(
                'Predicted values saved as %s (for the %s dataset)' %
                (predicted_attribute_name, dataset.get_dataset_name()))
            logger.log_status(
                'Residuals saved as %s (for the %s dataset)' %
                (predicted_error_attribute_name, dataset.get_dataset_name()))

    def export_estimation_data(self,
                               submodel=-2,
                               file_name='./estimation_data_regression.txt',
                               delimiter='\t'):
        import os
        from numpy import newaxis
        data = concatenate((self.outcome[submodel][..., newaxis],
                            self.get_all_data(submodel=submodel)),
                           axis=1)
        header = ['outcome'] + self.get_coefficient_names(submodel).tolist()
        nrows = data.shape[0]
        file_name_root, file_name_ext = os.path.splitext(file_name)
        out_file = "%s_submodel_%s.txt" % (file_name_root, submodel)
        fh = open(out_file, 'w')
        fh.write(delimiter.join(header) + '\n')  #file header
        for row in range(nrows):
            line = [str(x) for x in data[row, ]]
            fh.write(delimiter.join(line) + '\n')
        fh.flush()
        fh.close
        print 'Data written into %s' % out_file

    def run_after_estimation(self, *args, **kwargs):
        return self.run(*args, **kwargs)

    def _get_status_total_pieces(self):
        return ChunkModel._get_status_total_pieces(
            self) * self.get_status_for_gui().get_total_number_of_pieces()

    def _get_status_current_piece(self):
        return ChunkModel._get_status_current_piece(
            self) * self.get_status_for_gui().get_total_number_of_pieces(
            ) + self.get_status_for_gui().get_current_piece()

    def _get_status_piece_description(self):
        return "%s %s" % (ChunkModel._get_status_piece_description(
            self), self.get_status_for_gui().get_current_piece_description())

    def get_specified_coefficients(self):
        return self.specified_coefficients
class RegressionModel(ChunkModel):

    model_name = "Regression Model"
    model_short_name = "RM"

    def __init__(self, regression_procedure="opus_core.linear_regression",
                  submodel_string=None,
                  run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None):
 
        self.debug = DebugPrinter(debuglevel)

        self.dataset_pool = self.create_dataset_pool(dataset_pool)

        self.regression = RegressionModelFactory().get_model(name=regression_procedure)
        if self.regression == None:
            raise StandardError, "No regression procedure given."

        self.submodel_string = submodel_string

        self.run_config = run_config
        if self.run_config == None:
            self.run_config = Resources()
        if not isinstance(self.run_config,Resources) and isinstance(self.run_config, dict):
            self.run_config = Resources(self.run_config)

        self.estimate_config = estimate_config
        if self.estimate_config == None:
            self.estimate_config = Resources()
        if not isinstance(self.estimate_config,Resources) and isinstance(self.estimate_config, dict):
            self.estimate_config = Resources(self.estimate_config)
            
        self.data = {}
        self.coefficient_names = {}
        ChunkModel.__init__(self)
        self.get_status_for_gui().initialize_pieces(3, pieces_description = array(['initialization', 'computing variables', 'submodel: 1']))

    def run(self, specification, coefficients, dataset, index=None, chunk_specification=None,
            data_objects=None, run_config=None, initial_values=None, procedure=None, debuglevel=0):
        """'specification' is of type EquationSpecification,
            'coefficients' is of type Coefficients,
            'dataset' is of type Dataset,
            'index' are indices of individuals in dataset for which
                        the model runs. If it is None, the whole dataset is considered.
            'chunk_specification' determines  number of chunks in which the simulation is processed.
            'data_objects' is a dictionary where each key is the name of an data object
            ('zone', ...) and its value is an object of class  Dataset.
           'run_config' is of type Resources, it gives additional arguments for the run.
           If 'procedure' is given, it overwrites the regression_procedure of the constructor.
           'initial_values' is an array of the initial values of the results. It will be overwritten
           by the results for those elements that are handled by the model (defined by submodels in the specification).
           By default the results are initialized with 0.
            'debuglevel' overwrites the constructor 'debuglevel'.
        """
        self.debug.flag = debuglevel
        if run_config == None:
            run_config = Resources()
        if not isinstance(run_config,Resources) and isinstance(run_config, dict):
            run_config = Resources(run_config)
        self.run_config = run_config.merge_with_defaults(self.run_config)
        self.run_config.merge({"debug":self.debug})
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.replace_dataset(dataset.get_dataset_name(), dataset)
        if procedure is not None: 
            self.regression = RegressionModelFactory().get_model(name=procedure)
        if initial_values is None:
            self.initial_values = zeros((dataset.size(),), dtype=float32)
        else:
            self.initial_values = zeros((dataset.size(),), dtype=initial_values.dtype)
            self.initial_values[index] = initial_values
            
        if dataset.size()<=0: # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())
            
        result = ChunkModel.run(self, chunk_specification, dataset, index, float32,
                                 specification=specification, coefficients=coefficients)
        return result

    def run_chunk (self, index, dataset, specification, coefficients):
        self.specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1)
        compute_resources = Resources({"debug":self.debug})
        submodels = self.specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(submodels, self.submodel_string, dataset, index,
                                      dataset_pool=self.dataset_pool, resources = compute_resources)
        variables = self.specified_coefficients.get_full_variable_names_without_constants()
        self.debug.print_debug("Compute variables ...",4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables, dataset_pool = self.dataset_pool, resources = compute_resources)
        data = {}
        coef = {}
        outcome=self.initial_values[index].copy()
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(self.specified_coefficients,submodel)
            self.coefficient_names[submodel] = coef[submodel].get_coefficient_names_without_constant()[0,:]
            self.debug.print_debug("Compute regression for submodel " +str(submodel),4)
            self.increment_current_status_piece()
            self.data[submodel] = dataset.create_regression_data(coef[submodel],
                                                                index = index[self.observations_mapping[submodel]])
            nan_index = where(isnan(self.data[submodel]))[1]
            inf_index = where(isinf(self.data[submodel]))[1]
            vnames = asarray(coef[submodel].get_variable_names())
            if nan_index.size > 0:
                nan_var_index = unique(nan_index)
                self.data[submodel] = nan_to_num(self.data[submodel])
                logger.log_warning("NaN(Not A Number) is returned from variable %s; it is replaced with %s." % (vnames[nan_var_index], nan_to_num(nan)))
                #raise ValueError, "NaN(Not A Number) is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[nan_var_index]
            if inf_index.size > 0:
                inf_var_index = unique(inf_index)
                self.data[submodel] = nan_to_num(self.data[submodel])
                logger.log_warning("Inf is returned from variable %s; it is replaced with %s." % (vnames[inf_var_index], nan_to_num(inf)))
                #raise ValueError, "Inf is returned from variable %s; check the model specification table and/or attribute values used in the computation for the variable." % vnames[inf_var_index]
            
            if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0): # observations for this submodel available
                outcome[self.observations_mapping[submodel]] = \
                    self.regression.run(self.data[submodel], coef[submodel].get_coefficient_values()[0,:],
                        resources=self.run_config).astype(outcome.dtype)
        return outcome

    def correct_infinite_values(self, dataset, outcome_attribute_name, maxvalue=1e+38, clip_all_larger_values=False):
        """Check if the model resulted in infinite values. If yes,
        print warning and clip the values to maxvalue. 
        If clip_all_larger_values is True, all values larger than maxvalue are clip to maxvalue.
        """
        infidx = where(dataset.get_attribute(outcome_attribute_name) == inf)[0]

        if infidx.size > 0:
            logger.log_warning("Infinite values in %s. Clipped to %s." % (outcome_attribute_name, maxvalue))
            dataset.set_values_of_one_attribute(outcome_attribute_name, maxvalue, infidx)
        if clip_all_larger_values:
            idx = where(dataset.get_attribute(outcome_attribute_name) > maxvalue)[0]
            if idx.size > 0:
                logger.log_warning("Values in %s larger than %s. Clipped to %s." % (outcome_attribute_name, maxvalue, maxvalue))
                dataset.set_values_of_one_attribute(outcome_attribute_name, maxvalue, idx)
            
    def estimate(self, specification, dataset, outcome_attribute, index = None, procedure=None, data_objects=None,
                        estimate_config=None,  debuglevel=0):
        """'specification' is of type EquationSpecification,
            'dataset' is of type Dataset,
            'outcome_attribute' - string that determines the dependent variable,
            'index' are indices of individuals in dataset for which
                    the model runs. If it is None, the whole dataset is considered.
            'procedure' - name of the estimation procedure. If it is None,
                there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class
                must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'.
                It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays).
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure.
            'debuglevel' overwrites the class 'debuglevel'.
        """
        #import wingdbstub
        self.debug.flag = debuglevel
        if estimate_config == None:
            estimate_config = Resources()
        if not isinstance(estimate_config,Resources) and isinstance(estimate_config, dict):
            estimate_config = Resources(estimate_config)
        self.estimate_config = estimate_config.merge_with_defaults(self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.procedure=procedure
        if self.procedure == None:
            self.procedure = self.estimate_config.get("estimation", None)
        if self.procedure is not None:
            self.procedure = ModelComponentCreator().get_model_component(self.procedure)
        else:
            logger.log_warning("No estimation procedure given, or problems with loading the corresponding module.")

        compute_resources = Resources({"debug":self.debug})
        if dataset.size()<=0: # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())
        if not isinstance(index,ndarray):
            index=array(index)

        estimation_size_agents = self.estimate_config.get("estimation_size_agents", None) # should be a proportion of the agent_set
        if estimation_size_agents == None:
            estimation_size_agents = 1.0
        else:
            estimation_size_agents = max(min(estimation_size_agents,1.0),0.0) # between 0 and 1

        if estimation_size_agents < 1.0:
            self.debug.print_debug("Sampling agents for estimation ...",3)
            estimation_idx = sample_noreplace(arange(index.size),
                                                         int(index.size*estimation_size_agents))
        else:
            estimation_idx = arange(index.size)

        estimation_idx = index[estimation_idx]
        self.debug.print_debug("Number of observations for estimation: " + str(estimation_idx.size),2)
        if estimation_idx.size <= 0:
            self.debug.print_debug("Nothing to be done.",2)
            return (None, None)

        coefficients = create_coefficient_from_specification(specification)
        self.specified_coefficients = SpecifiedCoefficients().create(coefficients, specification, neqs=1)
        submodels = self.specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(submodels, self.submodel_string, dataset, estimation_idx,
                                      dataset_pool=self.dataset_pool, resources = compute_resources,
                                      submodel_size_max=self.estimate_config.get('submodel_size_max', None))
        variables = self.specified_coefficients.get_full_variable_names_without_constants()
        self.debug.print_debug("Compute variables ...",4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables, dataset_pool=self.dataset_pool, resources = compute_resources)

        coef = {}
        estimated_coef={}
        self.outcome = {}
        dataset.compute_variables([outcome_attribute], dataset_pool=self.dataset_pool, resources=compute_resources)
        regression_resources=Resources(estimate_config)
        regression_resources.merge({"debug":self.debug})
        outcome_variable_name = VariableName(outcome_attribute)
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(self.specified_coefficients,submodel)
            self.increment_current_status_piece()
            logger.log_status("Estimate regression for submodel " +str(submodel),
                               tags=["estimate"], verbosity_level=2)
            #logger.log_status("Number of observations: " +str(self.observations_mapping[submodel].size),
                               #tags=["estimate"], verbosity_level=2)
            self.data[submodel] = dataset.create_regression_data_for_estimation(coef[submodel],
                                                            index = estimation_idx[self.observations_mapping[submodel]])
            self.coefficient_names[submodel] = coef[submodel].get_coefficient_names_without_constant()[0,:]
            if (self.data[submodel].shape[0] > 0) and (self.data[submodel].size > 0) and (self.procedure is not None): # observations for this submodel available
                self.outcome[submodel] = dataset.get_attribute_by_index(outcome_variable_name.get_alias(), estimation_idx[self.observations_mapping[submodel]])   
                regression_resources.merge({"outcome":  self.outcome[submodel]})
                regression_resources.merge({"coefficient_names":self.coefficient_names[submodel].tolist(),
                            "constant_position": coef[submodel].get_constants_positions()})
                regression_resources.merge({"submodel": submodel})
                estimated_coef[submodel] = self.procedure.run(self.data[submodel], self.regression,
                                                        resources=regression_resources)
                if "estimators" in estimated_coef[submodel].keys():
                    coef[submodel].set_coefficient_values(estimated_coef[submodel]["estimators"])
                if "standard_errors" in estimated_coef[submodel].keys():
                    coef[submodel].set_standard_errors(estimated_coef[submodel]["standard_errors"])
                if "other_measures" in estimated_coef[submodel].keys():
                    for measure in estimated_coef[submodel]["other_measures"].keys():
                        coef[submodel].set_measure(measure,
                              estimated_coef[submodel]["other_measures"][measure])
                if "other_info" in estimated_coef[submodel].keys():
                    for info in estimated_coef[submodel]["other_info"]:
                        coef[submodel].set_other_info(info,
                              estimated_coef[submodel]["other_info"][info])
        coefficients.fill_coefficients(coef)
        self.specified_coefficients.coefficients = coefficients
        self.save_predicted_values_and_errors(specification, coefficients, dataset, outcome_variable_name, index=index, data_objects=data_objects)
            
        return (coefficients, estimated_coef)

    def prepare_for_run(self, dataset=None, dataset_filter=None, filter_threshold=0, **kwargs):
        spec, coef = prepare_specification_and_coefficients(**kwargs)
        if (dataset is not None) and (dataset_filter is not None):
            filter_values = dataset.compute_variables([dataset_filter], dataset_pool=self.dataset_pool)
            index = where(filter_values > filter_threshold)[0]
        else:
            index = None
        return (spec, coef, index)

    def prepare_for_estimate(self, dataset=None, dataset_filter=None, filter_threshold=0, **kwargs):
        spec = get_specification_for_estimation(**kwargs)
        if (dataset is not None) and (dataset_filter is not None):
            filter_values = dataset.compute_variables([dataset_filter], dataset_pool=self.dataset_pool)
            index = where(filter_values > filter_threshold)[0]
        else:
            index = None
        return (spec, index)
    
    def get_data_as_dataset(self, submodel=-2):
        """Like get_all_data, but the retuning value is a Dataset containing attributes that
        correspond to the data columns. Their names are coefficient names."""
        all_data = self.get_all_data(submodel)
        if all_data is None:
            return None
        names = self.get_coefficient_names(submodel)
        if names is None:
            return None
        dataset_data = {}
        for i in range(names.size):
            dataset_data[names[i]] = all_data[:, i].reshape(all_data.shape[0])
        dataset_data["id"] = arange(all_data.shape[0])+1
        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name='dataset', table_data=dataset_data)
        ds = Dataset(in_storage=storage, id_name="id", in_table_name='dataset')
        return ds

    def save_predicted_values_and_errors(self, specification, coefficients, dataset, outcome_variable, index=None, data_objects=None):
        if self.estimate_config.get('save_predicted_values_and_errors', False):
            logger.log_status('Computing predicted values and residuals.')
            original_values = dataset.get_attribute_by_index(outcome_variable, index)
            predicted_values = zeros(dataset.size(), dtype='float32')
            predicted_values[index] = self.run_after_estimation(specification, coefficients, dataset, index=index, data_objects=data_objects)
            predicted_attribute_name = 'predicted_%s' % outcome_variable.get_alias()
            dataset.add_primary_attribute(name=predicted_attribute_name, data=predicted_values)
            dataset.flush_attribute(predicted_attribute_name)
            predicted_error_attribute_name = 'residuals_%s' % outcome_variable.get_alias()
            error_values = zeros(dataset.size(), dtype='float32')
            error_values[index] = (original_values - predicted_values[index]).astype(error_values.dtype)
            dataset.add_primary_attribute(name=predicted_error_attribute_name, data = error_values)
            dataset.flush_attribute(predicted_error_attribute_name)
            logger.log_status('Predicted values saved as %s (for the %s dataset)' % (predicted_attribute_name, dataset.get_dataset_name()))
            logger.log_status('Residuals saved as %s (for the %s dataset)' % (predicted_error_attribute_name, dataset.get_dataset_name()))
        
    def export_estimation_data(self, submodel=-2, file_name='./estimation_data_regression.txt', delimiter = '\t'):
        import os
        from numpy import newaxis
        data = concatenate((self.outcome[submodel][...,newaxis], self.get_all_data(submodel=submodel)), axis=1)
        header = ['outcome'] + self.get_coefficient_names(submodel).tolist()
        nrows = data.shape[0]
        file_name_root, file_name_ext = os.path.splitext(file_name)
        out_file = "%s_submodel_%s.txt" % (file_name_root, submodel)
        fh = open(out_file,'w')
        fh.write(delimiter.join(header) + '\n')   #file header
        for row in range(nrows):
            line = [str(x) for x in data[row,]]
            fh.write(delimiter.join(line) + '\n')
        fh.flush()
        fh.close
        print 'Data written into %s' % out_file
        
    def run_after_estimation(self, *args, **kwargs):
        return self.run(*args, **kwargs)
            
    def _get_status_total_pieces(self):
        return ChunkModel._get_status_total_pieces(self) * self.get_status_for_gui().get_total_number_of_pieces()
    
    def _get_status_current_piece(self):
        return ChunkModel._get_status_current_piece(self)*self.get_status_for_gui().get_total_number_of_pieces() + self.get_status_for_gui().get_current_piece()
        
    def _get_status_piece_description(self):
        return "%s %s" % (ChunkModel._get_status_piece_description(self), self.get_status_for_gui().get_current_piece_description())
    
    def get_specified_coefficients(self):
        return self.specified_coefficients
Example #41
0
class upc_sequence(object):
    """
        Invokes computation of utilities, probabilities and choices.
    """
    def __init__(self,
                 utility_class=None,
                 probability_class=None,
                 choice_class=None,
                 resources=None,
                 debuglevel=0):
        """utility_class, probability_class, choice_class are objects of the corresponding classes.
            They must have a method 'run'.
        """
        self.utility_class = utility_class
        self.probability_class = probability_class
        self.choice_class = choice_class
        self.resources = resources
        if self.resources == None:
            self.resources = Resources()
        self.utilities = None
        self.probabilities = None
        self.choices = None
        self.debug = DebugPrinter(debuglevel)

    def run(self, data=None, coefficients=None, resources=None):
        local_resources = Resources()
        if resources:
            local_resources.merge(resources)
        last_result = self.compute_utilities(data=data,
                                             coefficients=coefficients,
                                             resources=local_resources)
        this_result = self.compute_probabilities(resources=local_resources)
        if this_result <> None:
            last_result = this_result
        this_result = self.compute_choices(resources=local_resources)
        if this_result <> None:
            last_result = this_result
        return last_result

    def compute_utilities(self, data=None, coefficients=None, resources=None):
        if self.utility_class is None:
            self.debug.print_debug("No utilities class given.", 10)
            return None
        self.debug.print_debug("compute_utilities ...", 3)
        self.utilities = self.utility_class.run(data,
                                                coefficients,
                                                resources=resources)
        return self.utilities

    def compute_probabilities(self, resources=None):
        if self.probability_class is None:
            self.debug.print_debug("No probabilities class given.", 10)
            return None
        self.debug.print_debug("compute_probabilities ...", 3)
        self.probabilities = self.probability_class.run(self.utilities,
                                                        resources=resources)
        return self.probabilities

    def compute_choices(self, resources=None):
        if self.choice_class is None:
            self.debug.print_debug("No choices class given.", 10)
            return None
        self.debug.print_debug("compute_choices ...", 3)
        self.choices = self.choice_class.run(self.probabilities,
                                             resources=resources)
        return self.choices

    def get_utilities(self):
        return self.utilities

    def get_probabilities(self):
        return self.probabilities

    def write_probability_sums(self):
        self.probability_class.check_sum(self.probabilities)

    def get_choices(self):
        return self.choices

    def get_choice_histogram(self, min=None, max=None, bins=None):
        """Give an array that represents a histogram of choices."""
        if max == None:
            max = self.choices.max() + 1
        if min == None:
            min = self.choices.min()
        if bins == None:
            bins = max - min
        return histogram(self.get_choices(), min, max, bins)

    def get_probabilities_sum(self):
        """Return probabilities sum along the first axis.
        """
        probs = self.get_probabilities()
        if probs.ndim < 2:
            return probs.sum()
        return reshape(sum(probs, 0), probs.shape[1])

    def plot_choice_histograms(self, capacity, main=""):
        self.plot_histogram(numrows=2)
        self.plot_histogram_with_capacity(capacity)

    def plot_histogram(self, main="", numrows=1, numcols=1, fignum=1):
        """Plot a histogram of choices and probability sums. Expects probabilities as (at least) a 2D array.
        """
        from matplotlib.pylab import bar, xticks, yticks, title, text, axis, figure, subplot

        probabilities = self.get_probabilities()
        if probabilities.ndim < 2:
            raise StandardError, "probabilities must have at least 2 dimensions."
        alts = probabilities.shape[1]
        width_par = (1 / alts + 1) / 2.0
        choice_counts = self.get_choice_histogram(0, alts)
        sum_probs = self.get_probabilities_sum()

        subplot(numrows, numcols, fignum)
        bar(arange(alts), choice_counts, width=width_par)
        bar(arange(alts) + width_par, sum_probs, width=width_par, color='g')
        xticks(arange(alts))
        title(main)
        Axis = axis()
        text(alts + .5,
             -.1,
             "\nchoices histogram (blue),\nprobabilities sum (green)",
             horizontalalignment='right',
             verticalalignment='top')

    def plot_histogram_with_capacity(self, capacity, main=""):
        """Plot histogram of choices and capacities. The number of alternatives is determined
        from the second dimension of probabilities.
        """
        from matplotlib.pylab import bar, xticks, yticks, title, text, axis, figure, subplot

        probabilities = self.get_probabilities()
        if probabilities.ndim < 2:
            raise StandardError, "probabilities must have at least 2 dimensions."
        alts = self.probabilities.shape[1]
        width_par = (1 / alts + 1) / 2.0
        choice_counts = self.get_choice_histogram(0, alts)
        sum_probs = self.get_probabilities_sum()

        subplot(212)
        bar(arange(alts), choice_counts, width=width_par)
        bar(arange(alts) + width_par, capacity, width=width_par, color='r')
        xticks(arange(alts))
        title(main)
        Axis = axis()
        text(alts + .5,
             -.1,
             "\nchoices histogram (blue),\ncapacities (red)",
             horizontalalignment='right',
             verticalalignment='top')

    def show_plots(self, file=None):
        """Render the plots that have been generated.
        This method should be the last method called in the script, since it hands control to
        matplotlib's rendering backend.
        """
        from matplotlib.pylab import show, savefig
        if file is not None:
            savefig(file)
        else:
            show()

    def summary(self):
        logger.log_status("utilities")
        logger.log_status(self.get_utilities())
        logger.log_status("probabilities")
        logger.log_status(self.get_probabilities())
        logger.log_status("probabilities sums")
        self.write_probability_sums()
        logger.log_status("choices")
        logger.log_status(self.get_choices())

    def get_excess_demand(self, capacity):
        demand = self.get_probabilities_sum()
        return where(demand > capacity, demand - capacity, 0)

    def get_dependent_datasets(self):
        result = []
        if self.utility_class is not None:
            try:
                result = result + self.utility_class.get_dependent_datasets()
            except:
                pass
        if self.probability_class is not None:
            try:
                result = result + self.probability_class.get_dependent_datasets(
                )
            except:
                pass
        if self.choice_class is not None:
            try:
                result = result + self.choice_class.get_dependent_datasets()
            except:
                pass
        return result
    def get_variable(self,
                     variable_name,
                     dataset,
                     quiet=False,
                     debug=0,
                     index_name=None):
        """Returns an instance of class Variable. 
        'variable_name' is an instance of class VariableName. 
        'dataset' is an object of class Dataset to which the variable belongs to. 
        In case of an error in either importing the module or evaluating its constructor, 
        the method returns None.
        If quiet is True no warnings are printed.
        index_name is used for lag variables only.
        """
        lag_attribute_name = None
        lag_offset = 0

        if not isinstance(debug, DebugPrinter):
            debug = DebugPrinter(debug)

        if variable_name.get_autogen_class() is not None:
            # variable_name has an autogenerated class -- just use that
            variable_subclass = variable_name.get_autogen_class()
            substrings = ()
        else:
            # either find the variable name in the expression library (if present), in an appropriate 'aliases' file,
            # or load our variable class as 'variable_subclass' using an import statement
            short_name = variable_name.get_short_name()
            dataset_name = variable_name.get_dataset_name()
            package_name = variable_name.get_package_name()
            # if there isn't a package name, first look in the expression library (if there is a package name, look elsewhere)
            if package_name is None:
                e = VariableFactory._expression_library.get(
                    (dataset_name, short_name), None)
                if e is not None:
                    if e == variable_name.get_expression(
                    ):  # it is a primary attribute
                        return None
                    v = VariableName(e)
                    return VariableFactory().get_variable(v,
                                                          dataset,
                                                          quiet=quiet,
                                                          debug=debug)
            else:
                # not in the expression library - next look in the appropriate 'aliases' file, if one is present
                # (but only if we have a package name in the first place)
                try:
                    stmt = 'from %s.%s.aliases import aliases' % (package_name,
                                                                  dataset_name)
                    exec(stmt)
                except ImportError:
                    aliases = []
                for a in aliases:
                    # for each definition, see if the alias is equal to the short_name.  If it is,
                    # then use that definition for the variable
                    v = VariableName(a)
                    if v.get_alias() == short_name:
                        return VariableFactory().get_variable(v,
                                                              dataset,
                                                              quiet=quiet,
                                                              debug=debug)

            lag_variable_parser = LagVariableParser()
            if lag_variable_parser.is_short_name_for_lag_variable(short_name):
                lag_attribute_name, lag_offset = lag_variable_parser.parse_lag_variable_short_name(
                    short_name)
                true_short_name = "VVV_lagLLL"
                substrings = (package_name, lag_attribute_name, lag_offset,
                              dataset_name, index_name)
                opus_path = 'opus_core.variables'

            else:
                if package_name is None:
                    raise LookupError(
                        "Incomplete variable specification for '%s.%s' (missing package name, "
                        "and variable is not in expression library not a lag variable)."
                        % (dataset_name, short_name))

                opus_path = '%s.%s' % (package_name, dataset_name)

                true_short_name, substrings = VariableFamilyNameTranslator().\
                        get_translated_variable_name_and_substring_arguments(opus_path, short_name)

            module = '%s.%s' % (opus_path, true_short_name)

            # Note that simply checking for the .py module file would not
            # be safe here, as objects could be instantiated in __init__.py files.
            try:
                ev = "from %s import %s as variable_subclass" % (
                    module, true_short_name)
                debug.print_debug("Evaluating '" + ev + "'.", 12)
                exec(ev)
                debug.print_debug("Successful.", 12)
            except ImportError, e:
                if not quiet:
                    from opus_core.simulation_state import SimulationState
                    time = SimulationState().get_current_time()
                    desc = '\n'.join((
                        "Opus variable '%s' does not exist for dataset '%s' in year %s. "
                        "The following error occured when finally trying to import "
                        "the variable '%s' from the Python module "
                        "'%s':",
                        "%s",
                    )) % (true_short_name, opus_path, time, true_short_name,
                          module,
                          indent_text(
                              formatPlainTextExceptionInfoWithoutLog('')))
                    raise NameError(desc)
                return None
class EmploymentTransitionModel(Model):
    """Creates and removes jobs from job_set."""

    model_name = "Employment Transition Model"
    location_id_name_default = "grid_id"
    variable_package_default = "urbansim"

    def __init__(self, location_id_name=None, variable_package=None, dataset_pool=None, debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.location_id_name = self.location_id_name_default
        self.variable_package = self.variable_package_default
        if location_id_name is not None:
            self.location_id_name = location_id_name
        if variable_package is not None:
            self.variable_package = variable_package
        self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"])

    def run(self, year, job_set, control_totals, job_building_types, data_objects=None, resources=None):
        self._do_initialize_for_run(job_set, job_building_types, data_objects)
        idx = where(control_totals.get_attribute("year")==year)[0]
        self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
        self._do_run_for_this_year(job_set)
        return self._update_job_set(job_set)
        
    def _do_initialize_for_run(self, job_set, job_building_types, data_objects=None):
        self.max_id = job_set.get_id_attribute().max()
        self.job_size = job_set.size()
        self.job_id_name = job_set.get_id_name()[0]
        self.new_jobs = {
            self.location_id_name:array([], dtype=job_set.get_data_type(self.location_id_name, int32)),
            "sector_id":array([], dtype=job_set.get_data_type("sector_id", int32)),
            self.job_id_name:array([], dtype=job_set.get_data_type(self.job_id_name, int32)),
            "building_type":array([], dtype=job_set.get_data_type("building_type", int8))
                    }
        self.remove_jobs = array([], dtype=int32)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.dataset_pool.add_datasets_if_not_included({job_building_types.get_dataset_name():job_building_types})
        self.available_building_types = job_building_types.get_id_attribute()

    def _compute_sector_variables(self, sectors, job_set):
        compute_resources = Resources({"debug":self.debug})
        job_set.compute_variables(
            map(lambda x: "%s.%s.is_in_employment_sector_%s_home_based"
                    % (self.variable_package, job_set.get_dataset_name(), x),
                sectors) +
            map(lambda x: "%s.%s.is_in_employment_sector_%s_non_home_based"
                    % (self.variable_package, job_set.get_dataset_name(), x),
                sectors) + ["is_non_home_based_job", "is_home_based_job"],
            dataset_pool = self.dataset_pool,
            resources = compute_resources)
        
    def _do_run_for_this_year(self, job_set):
        building_type = job_set.get_attribute("building_type")
        sectors = unique(self.control_totals_for_this_year.get_attribute("sector_id"))
        self._compute_sector_variables(sectors, job_set)
        for sector in sectors:
            isector = where(self.control_totals_for_this_year.get_attribute("sector_id") == sector)[0]
            total_hb_jobs = self.control_totals_for_this_year.get_attribute("total_home_based_employment")[isector]
            total_nhb_jobs = self.control_totals_for_this_year.get_attribute("total_non_home_based_employment")[isector]
            is_in_sector_hb = job_set.get_attribute("is_in_employment_sector_%s_home_based" % sector)
            is_in_sector_nhb = job_set.get_attribute("is_in_employment_sector_%s_non_home_based" % sector)
            diff_hb = int(total_hb_jobs - is_in_sector_hb.astype(int8).sum())
            diff_nhb = int(total_nhb_jobs - is_in_sector_nhb.astype(int8).sum())
            if diff_hb < 0: # home based jobs to be removed
                w = where(is_in_sector_hb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_hb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate((self.remove_jobs, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff_hb)-size_non_placed))))
            if diff_nhb < 0: # non home based jobs to be removed
                w = where(is_in_sector_nhb == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(job_set, w, -1*diff_nhb,
                                                         self.location_id_name)
                self.remove_jobs = concatenate((self.remove_jobs, non_placed,
                                           sample_noreplace(sample_array, max(0,abs(diff_nhb)-size_non_placed))))

            if diff_hb > 0: # home based jobs to be created
                self.new_jobs[self.location_id_name] = concatenate((self.new_jobs[self.location_id_name],
                                   zeros((diff_hb,), dtype=self.new_jobs[self.location_id_name].dtype.type)))
                self.new_jobs["sector_id"] = concatenate((self.new_jobs["sector_id"],
                                   (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_hb))))
                if 1 in is_in_sector_hb:
                    building_type_distribution = array(ndimage_sum(is_in_sector_hb,
                                                                    labels=building_type,
                                                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute("is_home_based_job"): # take the building type distribution from the whole region
                    building_type_distribution = array(ndimage_sum(
                                                                job_set.get_attribute("is_home_based_job"),
                                                                labels=building_type,
                                                                index=self.available_building_types))
                else: # there are no home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(self.available_building_types.size)
                    building_type_distribution = building_type_distribution/building_type_distribution.sum()
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_hb, building_type_distribution/
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"],
                            sampled_building_types.astype(self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id + diff_hb
                self.new_jobs[self.job_id_name] = concatenate((self.new_jobs[self.job_id_name],
                                                     arange(self.max_id+1, new_max_id+1)))
                self.max_id = new_max_id

            if diff_nhb > 0: # non home based jobs to be created
                self.new_jobs[self.location_id_name]=concatenate((self.new_jobs[self.location_id_name],
                                     zeros((diff_nhb,), dtype=self.new_jobs[self.location_id_name].dtype.type)))
                self.new_jobs["sector_id"]=concatenate((self.new_jobs["sector_id"],
                                           (resize(array([sector], dtype=self.new_jobs["sector_id"].dtype.type), diff_nhb))))
                if 1 in is_in_sector_nhb:
                    building_type_distribution = array(ndimage_sum(is_in_sector_nhb,
                                                                    labels=building_type,
                                                                    index=self.available_building_types))
                elif 1 in job_set.get_attribute("is_non_home_based_job"): # take the building type distribution from the whole region
                    building_type_distribution = array(ndimage_sum(
                                                        job_set.get_attribute("is_non_home_based_job"),
                                                        labels=building_type,
                                                        index=self.available_building_types))
                else: # there are no non-home-based jobs in the region, take uniform distribution
                    building_type_distribution = ones(self.available_building_types.size)
                    building_type_distribution = building_type_distribution/building_type_distribution.sum()
                sampled_building_types = probsample_replace(
                    self.available_building_types, diff_nhb, building_type_distribution/
                    float(building_type_distribution.sum()))
                self.new_jobs["building_type"] = concatenate((self.new_jobs["building_type"],
                                        sampled_building_types.astype(self.new_jobs["building_type"].dtype.type)))
                new_max_id = self.max_id+diff_nhb
                self.new_jobs[self.job_id_name]=concatenate((self.new_jobs[self.job_id_name], arange(self.max_id+1, 
                                                                                                     new_max_id+1)))
                self.max_id = new_max_id

    def _update_job_set(self, job_set):
        job_set.remove_elements(self.remove_jobs)
        job_set.add_elements(self.new_jobs, require_all_attributes=False)
        difference = job_set.size()-self.job_size
        self.debug.print_debug("Difference in number of jobs: %s (original %s,"
            " new %s, created %s, deleted %s)"
                % (difference,
                   self.job_size,
                   job_set.size(),
                   self.new_jobs[self.job_id_name].size,
                   self.remove_jobs.size),
            3)
        self.debug.print_debug("Number of unplaced jobs: %s"
            % where(job_set.get_attribute(self.location_id_name) <=0)[0].size,
            3)
        return difference

    def prepare_for_run(self, storage, **kwargs):
        from urbansim.datasets.control_total_dataset import ControlTotalDataset
        control_totals = ControlTotalDataset(in_storage=storage, what="employment")
        sample_control_totals(storage, control_totals, **kwargs)
        return control_totals
 def __init__(self, debuglevel=0):
     self.debug = DebugPrinter(debuglevel)
class HouseholdTransitionModel(Model):
    """Creates and removes households from household_set. New households are duplicated from the existing households, keeping 
       the joint distribution of all characteristics. 
    """

    model_name = "Household Transition Model"

    def __init__(self, location_id_name="grid_id", dataset_pool=None, debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.location_id_name = location_id_name
        self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"])

    def run(self, year, household_set, control_totals, characteristics, resources=None):
        self._do_initialize_for_run(household_set)
        control_totals.get_attribute("total_number_of_households") # to make sure they are loaded
        self.characteristics = characteristics
        self.all_categories = self.characteristics.get_attribute("characteristic")
        self.all_categories = array(map(lambda x: x.lower(), self.all_categories))
        self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist()
        self.marginal_characteristic_names = copy(control_totals.get_id_name())
        index_year = self.marginal_characteristic_names.index("year")
        self.marginal_characteristic_names.remove("year")
        idx = where(control_totals.get_attribute("year")==year)[0]
        self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
        self._do_run_for_this_year(household_set)
        return self._update_household_set(household_set)
        
    def _update_household_set(self, household_set):
        index_of_duplicated_hhs = household_set.duplicate_rows(self.mapping_existing_hhs_to_new_hhs)
        household_set.modify_attribute(name=self.location_id_name, data=-1 * ones((index_of_duplicated_hhs.size,), 
                                                                              dtype=household_set.get_data_type(self.location_id_name)),
                                                                          index=index_of_duplicated_hhs)
        household_set.remove_elements(self.remove_households)
        if self.new_households[self.location_id_name].size > 0:
            max_id = household_set.get_id_attribute().max()
            self.new_households[self.household_id_name]=concatenate((self.new_households[self.household_id_name],
                                                             arange(max_id+1, max_id+self.new_households[self.location_id_name].size+1)))
            household_set.add_elements(self.new_households, require_all_attributes=False)

        difference = household_set.size()-self.household_size
        self.debug.print_debug("Difference in number of households: %s"
            " (original %s, new %s, created %s, deleted %s)"
                % (difference,
                   self.household_size,
                   household_set.size(),
                   self.new_households[self.household_id_name].size + self.mapping_existing_hhs_to_new_hhs.size,
                   self.remove_households.size),
            3)
        if self.location_id_name in household_set.get_attribute_names():
            self.debug.print_debug("Number of unplaced households: %s"
                % where(household_set.get_attribute(self.location_id_name) <=0)[0].size,
                3)
        return difference

    def _do_initialize_for_run(self, household_set):
        self.household_id_name = household_set.get_id_name()[0]
        self.new_households = {
           self.location_id_name:array([], dtype=household_set.get_data_type(self.location_id_name, int32)),
           self.household_id_name:array([], dtype=household_set.get_data_type(self.household_id_name, int32))
                   }
        self.remove_households = array([], dtype='int32')
        self.household_size = household_set.size()
        self.max_id = household_set.get_id_attribute().max()
        self.arrays_from_categories = {}
        self.arrays_from_categories_mapping = {}
        self.mapping_existing_hhs_to_new_hhs = array([], dtype=household_set.get_data_type(self.household_id_name, int32))
        
    def _do_run_for_this_year(self, household_set):
        self.household_set = household_set
        groups = self.control_totals_for_this_year.get_id_attribute()
        self.create_arrays_from_categories(self.household_set)

        all_characteristics = self.arrays_from_categories.keys()
        self.household_set.load_dataset_if_not_loaded(attributes = all_characteristics) # prevents from lazy loading to save runtime
        idx_shape = []
        number_of_combinations=1
        num_attributes=len(all_characteristics)
        for iattr in range(num_attributes):
            attr = all_characteristics[iattr]
            max_bins = self.arrays_from_categories[attr].max()+1
            idx_shape.append(max_bins)
            number_of_combinations=number_of_combinations*max_bins
            if attr not in self.new_households.keys():
                self.new_households[attr] = array([], dtype=self.household_set.get_data_type(attr, float32))

        self.number_of_combinations = int(number_of_combinations)
        idx_tmp = indices(tuple(idx_shape))
        
        categories_index = zeros((self.number_of_combinations,num_attributes))

        for i in range(num_attributes): #create indices of all combinations
            categories_index[:,i] = idx_tmp[i].ravel()

        categories_index_mapping = {}
        for i in range(self.number_of_combinations):
            categories_index_mapping[tuple(categories_index[i,].tolist())] = i

        def get_category(values):
            bins = map(lambda x, y: self.arrays_from_categories[x][int(y)], all_characteristics, values)
            try:
                return categories_index_mapping[tuple(bins)]
            except KeyError, msg: 
                where_error = where(array(bins) == -1)[0]
                if where_error.size > 0:
                    raise KeyError, \
                        "Invalid value of %s for attribute %s. It is not included in the characteristics groups." % (
                                                                               array(values)[where_error], 
                                                                               array(all_characteristics)[where_error])
                raise KeyError, msg

        if num_attributes > 0:
            # the next array must be a copy of the household values, otherwise, it changes the original values
            values_array = reshape(array(self.household_set.get_attribute(all_characteristics[0])), (self.household_set.size(),1))
            if num_attributes > 1:
                for attr in all_characteristics[1:]:
                    values_array = concatenate((values_array, reshape(array(self.household_set.get_attribute(attr)),
                                                                      (self.household_set.size(),1))), axis=1)
            for i in range(values_array.shape[1]):
                if values_array[:,i].max() > 10000:
                    values_array[:,i] = values_array[:,i]/10
                values_array[:,i] = clip(values_array[:,i], 0, self.arrays_from_categories[all_characteristics[i]].size-1)
    
            # determine for each household to what category it belongs to
            self.household_categories = array(map(lambda x: get_category(x), values_array)) # performance bottleneck
    
            number_of_households_in_categories = array(ndimage_sum(ones((self.household_categories.size,)),
                                                                    labels=self.household_categories+1,
                                                                    index = arange(self.number_of_combinations)+1))
        else:
            # no marginal characteristics; consider just one group
            self.household_categories = zeros(self.household_set.size(), dtype='int32')
            number_of_households_in_categories = array([self.household_set.size()])

        g=arange(num_attributes)

        #iterate over marginal characteristics
        for group in groups:
            if groups.ndim <= 1: # there is only one group (no marginal char.)
                id = group
            else:
                id = tuple(group.tolist())
            group_element = self.control_totals_for_this_year.get_data_element_by_id(id)
            total = group_element.total_number_of_households
            for i in range(g.size):
                g[i] = eval("group_element."+self.arrays_from_categories.keys()[i])
            if g.size <= 0:
                l = ones((number_of_households_in_categories.size,))
            else:
                l = categories_index[:,0] == g[0]
                for i in range(1,num_attributes):
                    l = logical_and(l, categories_index[:,i] == g[i])
            # l has 1's for combinations of this group
            number_in_group = array(ndimage_sum(number_of_households_in_categories, labels=l, index = 1))
            diff = int(total - number_in_group)
            if diff < 0: # households to be removed
                is_in_group = l[self.household_categories]
                w = where(is_in_group)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(self.household_set, w, -1*diff,
                                                          self.location_id_name)
                self.remove_households = concatenate((self.remove_households, non_placed, sample_noreplace(sample_array,
                                                                                   max(0,abs(diff)-size_non_placed))))
            if diff > 0: # households to be created
                self._create_households(diff, l)
Example #46
0
class upc_sequence(object):
    """
        Invokes computation of utilities, probabilities and choices.
    """

    def __init__(self, utility_class=None, probability_class=None, choice_class=None, resources=None, debuglevel=0):
        """utility_class, probability_class, choice_class are objects of the corresponding classes.
            They must have a method 'run'.
        """
        self.utility_class = utility_class
        self.probability_class = probability_class
        self.choice_class = choice_class
        self.resources = resources
        if self.resources == None:
            self.resources = Resources()
        self.utilities = None
        self.probabilities = None
        self.choices = None
        self.debug = DebugPrinter(debuglevel)

    def run(self, data=None, coefficients=None, resources=None):
        local_resources = Resources()
        if resources:
            local_resources.merge(resources)
        last_result = self.compute_utilities(data=data, coefficients=coefficients, resources=local_resources)
        this_result = self.compute_probabilities(resources=local_resources)
        if this_result <> None:
            last_result = this_result
        this_result = self.compute_choices(resources=local_resources)
        if this_result <> None:
            last_result = this_result
        return last_result

    def compute_utilities(self, data=None, coefficients=None, resources=None):
        if self.utility_class is None:
            self.debug.print_debug("No utilities class given.", 10)
            return None
        self.debug.print_debug("compute_utilities ...", 3)
        self.utilities = self.utility_class.run(data, coefficients, resources=resources)
        return self.utilities

    def compute_probabilities(self, resources=None):
        if self.probability_class is None:
            self.debug.print_debug("No probabilities class given.", 10)
            return None
        self.debug.print_debug("compute_probabilities ...", 3)
        self.probabilities = self.probability_class.run(self.utilities, resources=resources)
        return self.probabilities

    def compute_choices(self, resources=None):
        if self.choice_class is None:
            self.debug.print_debug("No choices class given.", 10)
            return None
        self.debug.print_debug("compute_choices ...", 3)
        self.choices = self.choice_class.run(self.probabilities, resources=resources)
        return self.choices

    def get_utilities(self):
        return self.utilities

    def get_probabilities(self):
        return self.probabilities

    def write_probability_sums(self):
        self.probability_class.check_sum(self.probabilities)

    def get_choices(self):
        return self.choices

    def get_choice_histogram(self, min=None, max=None, bins=None):
        """Give an array that represents a histogram of choices."""
        if max == None:
            max = self.choices.max() + 1
        if min == None:
            min = self.choices.min()
        if bins == None:
            bins = max - min
        return histogram(self.get_choices(), min, max, bins)

    def get_probabilities_sum(self):
        """Return probabilities sum along the first axis.
        """
        probs = self.get_probabilities()
        if probs.ndim < 2:
            return probs.sum()
        return reshape(sum(probs, 0), probs.shape[1])

    def plot_choice_histograms(self, capacity, main=""):
        self.plot_histogram(numrows=2)
        self.plot_histogram_with_capacity(capacity)

    def plot_histogram(self, main="", numrows=1, numcols=1, fignum=1):
        """Plot a histogram of choices and probability sums. Expects probabilities as (at least) a 2D array.
        """
        from matplotlib.pylab import bar, xticks, yticks, title, text, axis, figure, subplot

        probabilities = self.get_probabilities()
        if probabilities.ndim < 2:
            raise StandardError, "probabilities must have at least 2 dimensions."
        alts = probabilities.shape[1]
        width_par = (1 / alts + 1) / 2.0
        choice_counts = self.get_choice_histogram(0, alts)
        sum_probs = self.get_probabilities_sum()

        subplot(numrows, numcols, fignum)
        bar(arange(alts), choice_counts, width=width_par)
        bar(arange(alts) + width_par, sum_probs, width=width_par, color="g")
        xticks(arange(alts))
        title(main)
        Axis = axis()
        text(
            alts + 0.5,
            -0.1,
            "\nchoices histogram (blue),\nprobabilities sum (green)",
            horizontalalignment="right",
            verticalalignment="top",
        )

    def plot_histogram_with_capacity(self, capacity, main=""):
        """Plot histogram of choices and capacities. The number of alternatives is determined
        from the second dimension of probabilities.
        """
        from matplotlib.pylab import bar, xticks, yticks, title, text, axis, figure, subplot

        probabilities = self.get_probabilities()
        if probabilities.ndim < 2:
            raise StandardError, "probabilities must have at least 2 dimensions."
        alts = self.probabilities.shape[1]
        width_par = (1 / alts + 1) / 2.0
        choice_counts = self.get_choice_histogram(0, alts)
        sum_probs = self.get_probabilities_sum()

        subplot(212)
        bar(arange(alts), choice_counts, width=width_par)
        bar(arange(alts) + width_par, capacity, width=width_par, color="r")
        xticks(arange(alts))
        title(main)
        Axis = axis()
        text(
            alts + 0.5,
            -0.1,
            "\nchoices histogram (blue),\ncapacities (red)",
            horizontalalignment="right",
            verticalalignment="top",
        )

    def show_plots(self, file=None):
        """Render the plots that have been generated.
        This method should be the last method called in the script, since it hands control to
        matplotlib's rendering backend.
        """
        from matplotlib.pylab import show, savefig

        if file is not None:
            savefig(file)
        else:
            show()

    def summary(self):
        logger.log_status("utilities")
        logger.log_status(self.get_utilities())
        logger.log_status("probabilities")
        logger.log_status(self.get_probabilities())
        logger.log_status("probabilities sums")
        self.write_probability_sums()
        logger.log_status("choices")
        logger.log_status(self.get_choices())

    def get_excess_demand(self, capacity):
        demand = self.get_probabilities_sum()
        return where(demand > capacity, demand - capacity, 0)

    def get_dependent_datasets(self):
        result = []
        if self.utility_class is not None:
            try:
                result = result + self.utility_class.get_dependent_datasets()
            except:
                pass
        if self.probability_class is not None:
            try:
                result = result + self.probability_class.get_dependent_datasets()
            except:
                pass
        if self.choice_class is not None:
            try:
                result = result + self.choice_class.get_dependent_datasets()
            except:
                pass
        return result
 def __init__(self, location_id_name="grid_id", dataset_pool=None, debuglevel=0):
     self.debug = DebugPrinter(debuglevel)
     self.location_id_name = location_id_name
     self.dataset_pool = self.create_dataset_pool(dataset_pool, ["urbansim", "opus_core"])
Example #48
0
class AgentRelocationModel(Model):
    """Chooses agents for relocation (according to probabilities computed by the probabilities class).
    It includes all jobs that are unplaced. If probabilities is set to None, only unplaced agents are chosen.
    The run method returns indices of the chosen agents.
    """
    def __init__(self,
                 probabilities="urbansim.rate_based_probabilities",
                 choices="opus_core.random_choices",
                 location_id_name="grid_id",
                 model_name="Agent Relocation Model",
                 debuglevel=0,
                 resources=None):
        self.model_name = model_name
        self.location_id_name = location_id_name
        self.debug = DebugPrinter(debuglevel)
        self.upc_sequence = None
        if probabilities is not None:
            self.upc_sequence = UPCFactory().get_model(
                utilities=None,
                probabilities=probabilities,
                choices=choices,
                debuglevel=debuglevel)
        self.resources = merge_resources_if_not_None(resources)

    def run(self,
            agent_set,
            resources=None,
            reset_attribute_value={},
            append_unplaced_agents_index=True):
        self.resources.merge(resources)

        if agent_set.size() <= 0:
            agent_set.get_id_attribute()
            if agent_set.size() <= 0:
                self.debug.print_debug("Nothing to be done.", 2)
                return array([], dtype='int32')

        if self.upc_sequence and (self.upc_sequence.probability_class.rate_set
                                  or self.resources.get(
                                      'relocation_rate', None)):
            self.resources.merge(
                {agent_set.get_dataset_name(): agent_set}
            )  #to be compatible with old-style one-relocation_probabilities-module-per-model
            self.resources.merge({'agent_set': agent_set})
            choices = self.upc_sequence.run(resources=self.resources)
            # choices have value 1 for agents that should be relocated, otherwise 0.
            movers_indices = where(choices > 0)[0]
        else:
            movers_indices = array([], dtype='int32')

        if reset_attribute_value and movers_indices.size > 0:
            for key, value in reset_attribute_value.items():
                agent_set.modify_attribute(name=key,
                                           data=resize(asarray(value),
                                                       movers_indices.size),
                                           index=movers_indices)
        if append_unplaced_agents_index:
            # add unplaced agents
            unplaced_agents = where(
                agent_set.get_attribute(self.location_id_name) <= 0)[0]
            movers_indices = unique(
                concatenate((movers_indices, unplaced_agents)))

        logger.log_status("Number of movers: " + str(movers_indices.size))
        return movers_indices

    def prepare_for_run(self,
                        what=None,
                        rate_dataset_name=None,
                        rate_storage=None,
                        rate_table=None,
                        sample_rates=False,
                        n=100,
                        multiplicator=1,
                        flush_rates=True):
        """
        what - unused, argument kept to be compatible with old code 
        """
        from opus_core.datasets.dataset_factory import DatasetFactory
        from opus_core.session_configuration import SessionConfiguration

        if (rate_storage is None) or ((rate_table is None) and
                                      (rate_dataset_name is None)):
            return self.resources
        if not rate_dataset_name:
            rate_dataset_name = DatasetFactory().dataset_name_for_table(
                rate_table)

        rates = DatasetFactory().search_for_dataset(
            rate_dataset_name,
            package_order=SessionConfiguration().package_order,
            arguments={
                'in_storage': rate_storage,
                'in_table_name': rate_table,
            })

        if sample_rates:
            cache_storage = None
            if flush_rates:
                cache_storage = rate_storage
            rates.sample_rates(n=n,
                               cache_storage=cache_storage,
                               multiplicator=multiplicator)
        self.resources.merge(
            {rate_dataset_name: rates}
        )  #to be compatible with old-style one-relocation_probabilities-module-per-model
        self.resources.merge({'relocation_rate': rates})
        return self.resources


### In order to remove a circular dependency between this file and
### household_location_choice_model_creator, these unit tests were moved into
### urbansim.tests.test_agent_relocation_model.
class DevelopmentEventTransitionModel(Model):
    """From given types of development projects, e.g. 'residential' or 'commercial', create
    development events, one for a gridcell. Only placed projects are considered.
    It returns an object of class DevelopmentEventDataset.
    """
    def __init__(self, resources=None, debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.resources = resources
        self.model_name = "Development Event Transition Model"
        
    def run(self, developments, year=0, landuse_types=None, units=None, resources=None):
#        landuse_types = ['residential', 'commercial', 'industrial', 'governmental']
#        units=['residential_units', 'commercial_sqft','industrial_sqft','governmental_sqft']
        
        if not isinstance(resources, Resources):
            resources = Resources()

        grid_ids_for_project = array([], dtype=int32)
        if developments <> None:
            grid_ids_for_project = developments.get_attribute("grid_id")
        grid_ids_for_project = unique(grid_ids_for_project)
        grid_ids_for_project = grid_ids_for_project[where(grid_ids_for_project>0)]
        
        if len(grid_ids_for_project)==0: return
        sizes = grid_ids_for_project.size
        result_data = {"grid_id": grid_ids_for_project, 
                       "scheduled_year":(year*ones((sizes,), dtype=int16)),
                       "development_type_id": zeros((sizes,),dtype=int16),
                   }
        
        for unit in units:
            result_data[unit] = zeros((sizes,), dtype=int32)
        for project_type in landuse_types:
            result_data["%s_improvement_value" % project_type] = zeros((sizes,), dtype=int32)
            
        grid_idx=0
        for grid_id in grid_ids_for_project:
            w = where(developments.get_attribute('grid_id') == grid_id)[0]
            if w.size>0:
                result_data["development_type_id"][grid_idx] = \
                    developments.get_attribute_by_index("development_type_id", w[0])
                for unit_variable in units:
                    result_data[unit_variable][grid_idx] = \
                        developments.get_attribute_by_index(unit_variable , w).sum()
                    result_data["%s_improvement_value" % unit_variable.split('_')[0]][grid_idx] = \
                        developments.get_attribute_by_index("improvement_value", w).sum()
            grid_idx += 1
            
        storage = StorageFactory().get_storage('dict_storage')

        eventset_table_name = 'eventset'        
        storage.write_table(
                table_name=eventset_table_name,
                table_data=result_data,
            )
        
        eventset = DevelopmentEventDataset(
            in_storage = storage,
            in_table_name = eventset_table_name, 
            id_name=['grid_id', 'scheduled_year'],
            )
            
        self.debug.print_debug('Number of events: ' + str(grid_ids_for_project.size), 3)
        
        return eventset

    def prepare_for_run(self, model_configuration):
        all_types = []
        all_units = []
        for atype in model_configuration['landuse_development_types']:
            all_types.append(atype)
            all_units.append(model_configuration['landuse_development_types'][atype]['units'])
        return  (all_types, all_units)
class ActiveDevelopmentsModel(Model):
    """
    
    If you have questions, contact Jesse Ayers at MAG:  [email protected]
    
    """

    model_name = "Active Developments Model"
    model_short_name = "ADM"

    def __init__(self, debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.debuglevel = debuglevel

    def run(
        self,
        percent_active_development=100,
        build_minimum_units=False,
        year=None,
        start_year=None,
        dataset_pool=None,
        capacity_this_year_variable="mag_zone.active_development.capacity_this_year",
    ):
        # General TODO:
        #    - deal w/ "other_spaces" columns
        #    - look at generalizing the weight used when building units
        #    - build unit test for minimum build feature

        # LIST OF MODEL ASSUMPTIONS:
        #    - TODO: can i generalize the need for these pre-defined variables?
        #    - the model expects variables to exist that correspond to this naming pattern
        #      for every is_developing building_type_name in the building_types dataset:
        #        - total_<building_type_name>_units_col
        #        - occupied_<building_type_name>_units_col
        #    - building_type_name must be unique, lowercase, contain no spaces
        #    - target_vacancy.is_developing defines which building_types are considered

        # Minimum build feature
        #    - The user can specify 2 additional columns in the building_types dataset:
        #        - adm_minimum_annual_build_units
        #        - adm_minimum_annual_build_max_year
        #    - If these fields are present, and the "build_minimum_units" run option is set to True
        #        - The model will utilize the information in the fields to build the minimum # of units annually
        #          specified in the building_types table up to the maximum year specified in the table.  This feature
        #          is designed to simulate the case when demand is too low to build new units, some will be built anyway

        # CURRENT LIST OF KNOWN ISSUES:
        #    -

        # Get current simulation year
        if year is None:
            simulation_year = SimulationState().get_current_time()
        else:
            simulation_year = year

        # only run if start_year
        if start_year:
            if start_year > simulation_year:
                return

        # Get the percent_active_development
        # convert it to a float
        percent_active_development = percent_active_development / 100.0

        # Get the dataset pool
        if dataset_pool is None:
            dataset_pool = SessionConfiguration().get_dataset_pool()
        else:
            dataset_pool = dataset_pool

        # get the active_developments dataset, subset it for actually active projects
        # compute some variables
        developments_dataset = dataset_pool.get_dataset("active_developments")
        active_developments_capacity = developments_dataset.compute_variables([capacity_this_year_variable])
        # TODO: need to further filter active developments, not only by start_year<=simulation_year,
        #       but also by whether they are built out, etc.
        active_developments_index = where(developments_dataset.get_attribute("start_year") <= simulation_year)[0]
        active_developments_capacity_this_year = active_developments_capacity[active_developments_index]
        # debug help
        self.debug.print_debug("\n*** BEGIN DEBUG INFO:", 1)
        self.debug.print_debug("len(active_developments_index) = %s" % len(active_developments_index), 1)
        self.debug.print_debug("len(active_developments_index) = %s" % len(active_developments_index), 1)
        self.debug.print_debug(
            "len(active_developments_capacity_this_year) = %s" % len(active_developments_capacity_this_year), 1
        )
        self.debug.print_debug("END DEBUG INFO ***\n", 1)

        # get the target_vacancy_rates dataset
        target_vacancy_rates_dataset = dataset_pool.get_dataset("target_vacancy")
        # get target vacancy rates for this simulation_year
        this_year_index = where(target_vacancy_rates_dataset.get_attribute("year") == simulation_year)[0]
        target_vacancies_for_this_year = DatasetSubset(target_vacancy_rates_dataset, this_year_index)
        # get some columns
        bldg_types = target_vacancies_for_this_year.get_attribute("building_type_id")
        tgt_vacancies = target_vacancies_for_this_year.get_attribute("target_vacancy")
        # get unique building types
        unique_building_types = unique1d(bldg_types)
        # build a dictionary containing building_type_id:{'target_vacancy_rate':<float>}
        developing_building_types_info = {}
        for unique_building_type in unique_building_types:
            unique_building_type_index = where(bldg_types == unique_building_type)[0]
            developing_building_types_info[unique_building_type] = {
                "target_vacancy_rate": tgt_vacancies[unique_building_type_index].mean()
            }
        # debug help
        if self.debuglevel > 0:
            self.debug_printer("developing_building_types_info", developing_building_types_info)

        # get the building_types dataset
        building_types_dataset = dataset_pool.get_dataset("building_type")
        # get the attribute names
        # I don't think this next line is used at all:
        # building_types_dataset_attribute_names = building_types_dataset.get_attribute_names()

        # get only the developing building types
        developing_types_index = where(building_types_dataset.get_attribute("is_developing") == 1)[0]
        developing_building_types_dataset = DatasetSubset(building_types_dataset, developing_types_index)
        # calculate active development capacity this simulation_year
        developing_building_type_ids = developing_building_types_dataset.get_attribute("building_type_id")
        building_type_names = developing_building_types_dataset.get_attribute("building_type_name")

        # add building_type_name to the dictionary
        # now the dictionary takes the form of:
        #    building_type_id:{'target_vacancy_rate':<float>,'building_type_name':<string>}
        counter = 0
        for developing_building_type_id in developing_building_type_ids:
            try:
                developing_building_types_info[developing_building_type_id]["building_type_name"] = building_type_names[
                    counter
                ]
                counter += 1
            except:
                logger.log_warning(
                    "You may have a mismatch in the building_type_ids between those in the target_vacancies dataset and the developing types in the building_types dataset."
                )
        # debug help
        if self.debuglevel > 0:
            self.debug_printer("developing_building_types_info", developing_building_types_info)

        # add 'is_residential' to the developing_building_types_info dictionary
        # now the dictionary takes the form of:
        #    building_type_id:{'target_vacancy_rate':<float>,'building_type_name':<string>,'is_residential':<integer>}
        for developing_building_type in developing_building_types_info:
            indx = where(building_types_dataset.get_attribute("building_type_id") == developing_building_type)[0]
            developing_building_types_info[developing_building_type][
                "is_residential"
            ] = building_types_dataset.get_attribute("is_residential")[indx][0]
        # debug help
        if self.debuglevel > 0:
            self.debug_printer("developing_building_types_info", developing_building_types_info)

        # add 'adm_minimum_annual_build_units' and 'adm_minimum_annual_build_max_year' to the developing_building_types_info dictionary
        # now the dictionary takes the form of:
        #    building_type_id:{'':<float>,'building_type_name':<string>,'is_residential':<integer>,'adm_minimum_annual_build_units':<integer>, 'adm_minimum_annual_build_max_units':<integer>}
        if build_minimum_units:
            try:
                for developing_building_type in developing_building_types_info:
                    indx = where(building_types_dataset.get_attribute("building_type_id") == developing_building_type)[
                        0
                    ]
                    developing_building_types_info[developing_building_type][
                        "adm_minimum_annual_build_units"
                    ] = building_types_dataset.get_attribute("adm_minimum_annual_build_units")[indx][0]
                for developing_building_type in developing_building_types_info:
                    indx = where(building_types_dataset.get_attribute("building_type_id") == developing_building_type)[
                        0
                    ]
                    developing_building_types_info[developing_building_type][
                        "adm_minimum_annual_build_max_year"
                    ] = building_types_dataset.get_attribute("adm_minimum_annual_build_max_year")[indx][0]
            except:
                logger.log_error(
                    '\n\nYou have the option "build_minimum_units" set to "True" but appear to be missing the "adm_minimum_annual_build_units" and "adm_minimum_annual_build_max_year" units in your "building_types" dataset.\n'
                )
                return

        # build a list of total and occupied units variables to compute of the form
        #     ['occupied_rsf_units_col','total_rsf_units_col', ...]
        # The variables that this section creates and computes need to be defined in the buildings
        #     dataset aliases.py file
        building_variables = []
        for building_type_id, dict_of_info in developing_building_types_info.iteritems():
            try:
                total, occupied = (
                    "total_%s_units_col" % dict_of_info["building_type_name"],
                    "occupied_%s_units_col" % dict_of_info["building_type_name"],
                )
                building_variables.append(total)
                building_variables.append(occupied)
            except:
                logger.log_warning(
                    "You may have a mismatch in the building_type_ids between those in the target_vacancies dataset and the developing types in the building_types dataset."
                )
        # debug help
        if self.debuglevel > 0:
            self.debug_printer("building_variables", building_variables)

        # get the buildings dataset
        buildings_dataset = dataset_pool.get_dataset("building")
        # compute total and occupied units variables
        buildings_dataset.compute_variables(building_variables)
        # sum up those variables into a dictionary of the form:
        #    {'occupied_rsf_units':<integer>, 'total_rsf_units':<integer>, ...}
        total_and_occupied_variable_sums = {}
        for building_variable in building_variables:
            summed_attribute = buildings_dataset.get_attribute("%s" % building_variable).sum()
            total_and_occupied_variable_sums[building_variable.replace("_col", "")] = summed_attribute
        # debug help
        if self.debuglevel > 0:
            self.debug_printer("total_and_occupied_variable_sums", total_and_occupied_variable_sums)

        # set up a table to log into
        status_log = PrettyTable()
        status_log.set_field_names(
            [  # "Type",
                "Name",
                "Occ Units",
                "Tot Units",
                "CurrentVR",
                "Target Units",
                "TargetVR",
                "Difference",
                "Max Act Dev Action",
                "Avail Act Dev",
                "Build Action",
            ]
        )

        # compute target units, vacancy rates, etc
        # go over each developing building type and compute target units, differences, total development required,
        #    available capacity in active_developments, and action to take in active_developments
        for developing_building_type in developing_building_types_info:
            # compute target variables
            # compute target variables into developing_building_types_info dict
            developing_building_types_info[developing_building_type][
                "target_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"]
            ] = int(
                round(
                    total_and_occupied_variable_sums[
                        "occupied_%s_units"
                        % developing_building_types_info[developing_building_type]["building_type_name"]
                    ]
                    / (1 - developing_building_types_info[developing_building_type]["target_vacancy_rate"])
                )
            )

            # compute difference variables
            # compute difference variables into developing_building_types_info dict
            developing_building_types_info[developing_building_type][
                "%s_diff" % developing_building_types_info[developing_building_type]["building_type_name"]
            ] = (
                developing_building_types_info[developing_building_type][
                    "target_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"]
                ]
                - total_and_occupied_variable_sums[
                    "total_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"]
                ]
            )

            # compute action variables
            # if the computed difference is  0 or negative (no demand for units of this type):
            if (
                developing_building_types_info[developing_building_type][
                    "%s_diff" % developing_building_types_info[developing_building_type]["building_type_name"]
                ]
                < 1
            ):
                # consider whether to build the minimum units
                # check simulation year against maximum annual build year
                if (
                    build_minimum_units
                    and developing_building_types_info[developing_building_type]["adm_minimum_annual_build_max_year"]
                    >= simulation_year
                ):
                    # build minimum
                    developing_building_types_info[developing_building_type][
                        "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
                    ] = developing_building_types_info[developing_building_type]["adm_minimum_annual_build_units"]
                else:
                    # build nothing
                    developing_building_types_info[developing_building_type][
                        "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
                    ] = 0
            # the computed difference is positive (demand for units of this type)
            # decide how much to build, the actual number demanded, or the minimum
            else:
                # compute the difference * the percent_active_development
                diff_with_pct_active = int(
                    developing_building_types_info[developing_building_type][
                        "%s_diff" % developing_building_types_info[developing_building_type]["building_type_name"]
                    ]
                    * percent_active_development
                )
                # if the diff_with_pct_active is greater than the minimum development:
                if (
                    build_minimum_units
                    and diff_with_pct_active
                    > developing_building_types_info[developing_building_type]["adm_minimum_annual_build_units"]
                ):
                    # just build the diff_with_pct_active
                    developing_building_types_info[developing_building_type][
                        "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
                    ] = diff_with_pct_active
                # the pct_diff_with_pct_active < minimum build and the max year for annual build is appropriate:
                elif (
                    build_minimum_units
                    and developing_building_types_info[developing_building_type]["adm_minimum_annual_build_max_year"]
                    >= simulation_year
                ):
                    # build the minimum
                    developing_building_types_info[developing_building_type][
                        "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
                    ] = developing_building_types_info[developing_building_type]["adm_minimum_annual_build_units"]
                # last case is the demand < minimum, but the simulation year > max year:
                else:
                    # build the pct_diff_with_pct_active
                    developing_building_types_info[developing_building_type][
                        "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
                    ] = diff_with_pct_active

            # compute how much development is available in active developments
            # add this information to the developing_building_types_info dictionary:
            #     building_type_id:{'target_vacancy_rate':<float>,'building_type_name':<string>,'available_active_capacity_this_year':<integer>}
            indx = where(
                developments_dataset.get_attribute("building_type_id")[active_developments_index]
                == developing_building_type
            )
            developing_building_types_info[developing_building_type][
                "active_developments_capacity_this_year_index"
            ] = indx
            developing_building_types_info[developing_building_type][
                "available_active_capacity_this_year"
            ] = active_developments_capacity_this_year[indx].sum()

            # compute actual action to take
            action = developing_building_types_info[developing_building_type][
                "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
            ]
            available = developing_building_types_info[developing_building_type]["available_active_capacity_this_year"]
            actual_action = self.lesser(action, available)
            # revise actual action if minimum build units is in effect:
            if (
                build_minimum_units
                and developing_building_types_info[developing_building_type]["adm_minimum_annual_build_max_year"]
                >= simulation_year
            ):
                actual_action = self.greater(
                    actual_action,
                    developing_building_types_info[developing_building_type]["adm_minimum_annual_build_units"],
                )
            developing_building_types_info[developing_building_type]["action_to_take_this_year"] = actual_action

            # create status line for logging
            status_line = [  # developing_building_type,
                developing_building_types_info[developing_building_type]["building_type_name"],
                total_and_occupied_variable_sums[
                    "occupied_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"]
                ],
                total_and_occupied_variable_sums[
                    "total_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"]
                ],
                round(
                    1
                    - (
                        total_and_occupied_variable_sums[
                            "occupied_%s_units"
                            % developing_building_types_info[developing_building_type]["building_type_name"]
                        ]
                        / total_and_occupied_variable_sums[
                            "total_%s_units"
                            % developing_building_types_info[developing_building_type]["building_type_name"]
                        ]
                    ),
                    4,
                ),
                developing_building_types_info[developing_building_type][
                    "target_%s_units" % developing_building_types_info[developing_building_type]["building_type_name"]
                ],
                developing_building_types_info[developing_building_type]["target_vacancy_rate"],
                developing_building_types_info[developing_building_type][
                    "%s_diff" % developing_building_types_info[developing_building_type]["building_type_name"]
                ],
                developing_building_types_info[developing_building_type][
                    "%s_action" % developing_building_types_info[developing_building_type]["building_type_name"]
                ],
                developing_building_types_info[developing_building_type]["available_active_capacity_this_year"],
                actual_action,
            ]
            status_log.add_row(status_line)

        # print the status table to the log
        logger.log_status(status_log)

        # debug help
        if self.debuglevel > 0:
            self.debug_printer("developing_building_types_info", developing_building_types_info)

        # update the active_developments and buildings datasets with new units
        for developing_building_type in developing_building_types_info:
            if developing_building_types_info[developing_building_type]["action_to_take_this_year"] > 0:
                # update 'current_built_units' column in active_developments dataset

                # get the index of the records of the current developing_building_type
                indx = developing_building_types_info[developing_building_type][
                    "active_developments_capacity_this_year_index"
                ]
                # get the total number of units to build this year
                total_action = developing_building_types_info[developing_building_type]["action_to_take_this_year"]
                # compute the weight as build_out capacity - current_built_units
                buildout_capacity = developments_dataset.get_attribute("build_out_capacity")[active_developments_index][
                    indx
                ]
                current_built_units = developments_dataset.get_attribute("current_built_units")[
                    active_developments_index
                ][indx]
                weights = buildout_capacity - current_built_units
                weights_sum = float(weights.sum())
                weight_array = weights / weights_sum
                # distribute the total to build against the weight
                action_array = (total_action * weight_array).astype("int32")
                new_built_units = current_built_units + action_array
                # make sure we are not going to build more than the buildout_capacity
                check = buildout_capacity - new_built_units
                check_lt_zero = where(check < 0)
                if check_lt_zero[0].size > 0:
                    # We have a problem, set the new_built_units = the buildout_capacity
                    #  for those records where we are blowing the buildout of the development
                    new_built_units[check_lt_zero] = buildout_capacity[check_lt_zero]
                # update the current_built_units column with new values
                developments_building_ids = developments_dataset.get_attribute("building_id")
                building_ids_to_be_updated = developments_building_ids[active_developments_index][indx]
                if self.debuglevel > 0:
                    self.debug_printer("building_ids_to_be_updated", building_ids_to_be_updated)
                building_ids_to_be_updated_index_on_developments = in1d(
                    developments_building_ids, building_ids_to_be_updated
                )
                developments_dataset.set_values_of_one_attribute(
                    "current_built_units", new_built_units, building_ids_to_be_updated_index_on_developments
                )
                # debug help
                if self.debuglevel > 0:
                    self.debug_printer("new_built_units", new_built_units)

                # update the relevant units column on the buildings dataset with new units
                # debug help
                if self.debuglevel > 0:
                    self.debug_printer("building_ids_to_be_updated", building_ids_to_be_updated)
                building_ids_to_be_updated_index_on_buildings = buildings_dataset.get_id_index(
                    building_ids_to_be_updated
                )
                # debug help
                if self.debuglevel > 0:
                    self.debug_printer(
                        "building_ids_to_be_updated_index_on_buildings", building_ids_to_be_updated_index_on_buildings
                    )
                if developing_building_types_info[developing_building_type]["is_residential"]:
                    buildings_dataset.set_values_of_one_attribute(
                        "residential_units", new_built_units, building_ids_to_be_updated_index_on_buildings
                    )
                else:
                    buildings_dataset.set_values_of_one_attribute(
                        "non_residential_sqft", new_built_units, building_ids_to_be_updated_index_on_buildings
                    )

    def debug_printer(self, name, item_to_print):
        self.debug.print_debug("\n*** BEGIN DEBUG INFO:", self.debuglevel)
        self.debug.print_debug("Printing: %s" % name, self.debuglevel)
        if isinstance(item_to_print, dict):
            try:
                from json import dumps

                self.debug.print_debug(dumps(item_to_print, indent=4), self.debuglevel)
            except:
                for key1, value1 in item_to_print.iteritems():
                    self.debug.print_debug("primary dict key = %s" % key1, 1)
                    for key2, value2 in value1.iteritems():
                        self.debug.print_debug("%s : %s" % (key2, value2), 1)
        else:
            self.debug.print_debug(item_to_print, self.debuglevel)
        self.debug.print_debug("END DEBUG INFO ***\n", self.debuglevel)

    def lesser(self, x, y):
        if x - y > 0:
            return y
        else:
            return x

    def greater(self, x, y):
        if x - y < 0:
            return y
        else:
            return x
class HouseholdTransitionModel(Model):
    """Creates and removes households from household_set. New households are duplicated from the existing households, keeping 
       the joint distribution of all characteristics. 
    """

    model_name = "Household Transition Model"

    def __init__(self,
                 location_id_name="grid_id",
                 dataset_pool=None,
                 debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        self.location_id_name = location_id_name
        self.dataset_pool = self.create_dataset_pool(dataset_pool,
                                                     ["urbansim", "opus_core"])

    def run(self,
            year,
            household_set,
            control_totals,
            characteristics,
            resources=None):
        self._do_initialize_for_run(household_set)
        control_totals.get_attribute(
            "total_number_of_households")  # to make sure they are loaded
        self.characteristics = characteristics
        self.all_categories = self.characteristics.get_attribute(
            "characteristic")
        self.all_categories = array(
            map(lambda x: x.lower(), self.all_categories))
        self.scaled_characteristic_names = get_distinct_names(
            self.all_categories).tolist()
        self.marginal_characteristic_names = copy(control_totals.get_id_name())
        index_year = self.marginal_characteristic_names.index("year")
        self.marginal_characteristic_names.remove("year")
        idx = where(control_totals.get_attribute("year") == year)[0]
        self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
        self._do_run_for_this_year(household_set)
        return self._update_household_set(household_set)

    def _update_household_set(self, household_set):
        index_of_duplicated_hhs = household_set.duplicate_rows(
            self.mapping_existing_hhs_to_new_hhs)
        household_set.modify_attribute(
            name=self.location_id_name,
            data=-1 * ones(
                (index_of_duplicated_hhs.size, ),
                dtype=household_set.get_data_type(self.location_id_name)),
            index=index_of_duplicated_hhs)
        household_set.remove_elements(self.remove_households)
        if self.new_households[self.location_id_name].size > 0:
            max_id = household_set.get_id_attribute().max()
            self.new_households[self.household_id_name] = concatenate(
                (self.new_households[self.household_id_name],
                 arange(
                     max_id + 1, max_id +
                     self.new_households[self.location_id_name].size + 1)))
            household_set.add_elements(self.new_households,
                                       require_all_attributes=False)

        difference = household_set.size() - self.household_size
        self.debug.print_debug(
            "Difference in number of households: %s"
            " (original %s, new %s, created %s, deleted %s)" %
            (difference, self.household_size, household_set.size(),
             self.new_households[self.household_id_name].size +
             self.mapping_existing_hhs_to_new_hhs.size,
             self.remove_households.size), 3)
        if self.location_id_name in household_set.get_attribute_names():
            self.debug.print_debug(
                "Number of unplaced households: %s" %
                where(household_set.get_attribute(self.location_id_name) <= 0)
                [0].size, 3)
        return difference

    def _do_initialize_for_run(self, household_set):
        self.household_id_name = household_set.get_id_name()[0]
        self.new_households = {
            self.location_id_name:
            array([],
                  dtype=household_set.get_data_type(self.location_id_name,
                                                    int32)),
            self.household_id_name:
            array([],
                  dtype=household_set.get_data_type(self.household_id_name,
                                                    int32))
        }
        self.remove_households = array([], dtype='int32')
        self.household_size = household_set.size()
        self.max_id = household_set.get_id_attribute().max()
        self.arrays_from_categories = {}
        self.arrays_from_categories_mapping = {}
        self.mapping_existing_hhs_to_new_hhs = array(
            [],
            dtype=household_set.get_data_type(self.household_id_name, int32))

    def _do_run_for_this_year(self, household_set):
        self.household_set = household_set
        groups = self.control_totals_for_this_year.get_id_attribute()
        self.create_arrays_from_categories(self.household_set)

        all_characteristics = self.arrays_from_categories.keys()
        self.household_set.load_dataset_if_not_loaded(
            attributes=all_characteristics
        )  # prevents from lazy loading to save runtime
        idx_shape = []
        number_of_combinations = 1
        num_attributes = len(all_characteristics)
        for iattr in range(num_attributes):
            attr = all_characteristics[iattr]
            max_bins = self.arrays_from_categories[attr].max() + 1
            idx_shape.append(max_bins)
            number_of_combinations = number_of_combinations * max_bins
            if attr not in self.new_households.keys():
                self.new_households[attr] = array(
                    [], dtype=self.household_set.get_data_type(attr, float32))

        self.number_of_combinations = int(number_of_combinations)
        idx_tmp = indices(tuple(idx_shape))

        categories_index = zeros((self.number_of_combinations, num_attributes))

        for i in range(num_attributes):  #create indices of all combinations
            categories_index[:, i] = idx_tmp[i].ravel()

        categories_index_mapping = {}
        for i in range(self.number_of_combinations):
            categories_index_mapping[tuple(categories_index[i, ].tolist())] = i

        def get_category(values):
            bins = map(lambda x, y: self.arrays_from_categories[x][int(y)],
                       all_characteristics, values)
            try:
                return categories_index_mapping[tuple(bins)]
            except KeyError, msg:
                where_error = where(array(bins) == -1)[0]
                if where_error.size > 0:
                    raise KeyError, \
                        "Invalid value of %s for attribute %s. It is not included in the characteristics groups." % (
                                                                               array(values)[where_error],
                                                                               array(all_characteristics)[where_error])
                raise KeyError, msg

        if num_attributes > 0:
            # the next array must be a copy of the household values, otherwise, it changes the original values
            values_array = reshape(
                array(self.household_set.get_attribute(
                    all_characteristics[0])), (self.household_set.size(), 1))
            if num_attributes > 1:
                for attr in all_characteristics[1:]:
                    values_array = concatenate(
                        (values_array,
                         reshape(array(self.household_set.get_attribute(attr)),
                                 (self.household_set.size(), 1))),
                        axis=1)
            for i in range(values_array.shape[1]):
                if values_array[:, i].max() > 10000:
                    values_array[:, i] = values_array[:, i] / 10
                values_array[:, i] = clip(
                    values_array[:, i], 0,
                    self.arrays_from_categories[all_characteristics[i]].size -
                    1)

            # determine for each household to what category it belongs to
            self.household_categories = array(
                map(lambda x: get_category(x),
                    values_array))  # performance bottleneck

            number_of_households_in_categories = array(
                ndimage_sum(ones((self.household_categories.size, )),
                            labels=self.household_categories + 1,
                            index=arange(self.number_of_combinations) + 1))
        else:
            # no marginal characteristics; consider just one group
            self.household_categories = zeros(self.household_set.size(),
                                              dtype='int32')
            number_of_households_in_categories = array(
                [self.household_set.size()])

        g = arange(num_attributes)

        #iterate over marginal characteristics
        for group in groups:
            if groups.ndim <= 1:  # there is only one group (no marginal char.)
                id = group
            else:
                id = tuple(group.tolist())
            group_element = self.control_totals_for_this_year.get_data_element_by_id(
                id)
            total = group_element.total_number_of_households
            for i in range(g.size):
                g[i] = eval("group_element." +
                            self.arrays_from_categories.keys()[i])
            if g.size <= 0:
                l = ones((number_of_households_in_categories.size, ))
            else:
                l = categories_index[:, 0] == g[0]
                for i in range(1, num_attributes):
                    l = logical_and(l, categories_index[:, i] == g[i])
            # l has 1's for combinations of this group
            number_in_group = array(
                ndimage_sum(number_of_households_in_categories,
                            labels=l,
                            index=1))
            diff = int(total - number_in_group)
            if diff < 0:  # households to be removed
                is_in_group = l[self.household_categories]
                w = where(is_in_group)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(self.household_set, w, -1*diff,
                                                          self.location_id_name)
                self.remove_households = concatenate(
                    (self.remove_households, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff) - size_non_placed))))
            if diff > 0:  # households to be created
                self._create_households(diff, l)
Example #52
0
class RateBasedModel(Model):
    """Chooses agents for relocation (according to probabilities computed by the probabilities class).
    It includes all jobs that are unplaced. If probabilities is set to None, only unplaced agents are chosen.
    The run method returns indices of the chosen agents.
    """
    model_name = 'Rate Based Model'
    
    def __init__(self,
                 probabilities = "opus_core.upc.rate_based_probabilities",
                 choices = "opus_core.random_choices",
                 model_name = None,
                 debuglevel=0,
                 resources=None
                 ):
        if model_name is not None:
            self.model_name = model_name
        self.debug = DebugPrinter(debuglevel)
        self.upc_sequence = None
        if probabilities is not None:
            self.upc_sequence = UPCFactory().get_model(utilities=None,
                                                       probabilities=probabilities,
                                                       choices=choices,
                                                       debuglevel=debuglevel)
        self.resources = merge_resources_if_not_None(resources)
        
    def run(self, agent_set, 
            resources=None, 
            reset_attribute_value={}):
        self.resources.merge(resources)
        
        if agent_set.size()<=0:
            agent_set.get_id_attribute()
            if agent_set.size()<= 0:
                self.debug.print_debug("Nothing to be done.",2)
                return array([], dtype='int32')

        if self.upc_sequence and (self.upc_sequence.probability_class.rate_set or self.resources.get('rate_set', None)):
            self.resources.merge({agent_set.get_dataset_name():agent_set}) #to be compatible with old-style one-relocation_probabilities-module-per-model
            self.resources.merge({'agent_set':agent_set})
            choices = self.upc_sequence.run(resources=self.resources)
            # choices have value 1 for agents that should be relocated, otherwise 0.
            movers_indices = where(choices>0)[0]
        else:
            movers_indices = array([], dtype='int32')

        if reset_attribute_value and movers_indices.size > 0:
            for key, value in reset_attribute_value.items():
                agent_set.modify_attribute(name=key, 
                                           data=resize(asarray(value), movers_indices.size),
                                           index=movers_indices)            
        
        logger.log_status("Number of agents sampled based on rates: " + str(movers_indices.size))
        return movers_indices

    def prepare_for_run(self, what=None, 
                        rate_dataset_name="rate",
                        rate_storage=None, 
                        rate_table=None, 
                        probability_attribute=None,
                        sample_rates=False, 
                        n=100, 
                        multiplicator=1, 
                        flush_rates=True):
        """
        what - unused, argument kept to be compatible with old code 
        """
        from opus_core.datasets.dataset_factory import DatasetFactory
        from opus_core.session_configuration import SessionConfiguration
        
        if (rate_storage is None) or ((rate_table is None) and (rate_dataset_name is None)):
            return self.resources
        if not rate_dataset_name:
            rate_dataset_name = DatasetFactory().dataset_name_for_table(rate_table)
        
        rates = DatasetFactory().search_for_dataset(rate_dataset_name,
                                                    package_order=SessionConfiguration().package_order,
                                                    arguments={'in_storage':rate_storage, 
                                                               'in_table_name':rate_table,
                                                           }
                                                    )
        if probability_attribute is not None:
            rates.probability_attribute = probability_attribute
        if sample_rates:
            cache_storage=None
            if flush_rates:
                cache_storage=rate_storage
            rates.sample_rates(n=n, cache_storage=cache_storage,
                                multiplicator=multiplicator)
        self.resources.merge({rate_dataset_name:rates}) #to be compatible with old-style one-relocation_probabilities-module-per-model
        self.resources.merge({'rate_set':rates})
        return self.resources
class BusinessTransitionModel(Model):
    """Creates and removes businesses from business_set."""
    
    model_name = "Business Transition Model"
    location_id_name = "building_id"
    variable_package = "sanfrancisco"
    
    def __init__(self, debuglevel=0):
        self.debug = DebugPrinter(debuglevel)
        
    def run(self, year, business_set, 
            control_totals, 
            dataset_pool=None, 
            resources=None):
        self.business_id_name = business_set.get_id_name()[0]
        control_for_businesses = False # If this is False, it is controlled for jobs
        if "total_number_of_businesses" in control_totals.get_known_attribute_names():
            control_for_businesses = True
            control_totals.get_attribute("total_number_of_businesses")
        else:
            control_totals.get_attribute("total_number_of_jobs")
        idx = where(control_totals.get_attribute("year")==year)
        sectors = unique(control_totals.get_attribute_by_index("sector_id", idx))
        self.max_id = business_set.get_id_attribute().max()
        business_size = business_set.size()
        self.new_businesses = {self.location_id_name:array([], dtype='int32'), 
                          "sector_id":array([], dtype='int32'),
                          self.business_id_name:array([], dtype='int32'), 
                          "sqft":array([], dtype=int32),
                          "employment":array([], dtype='int32'),
                          "activity_id":array([], dtype='int32')}

        business_set.compute_variables(
            map(lambda x: "%s.%s.is_of_sector_%s" 
                    % (self.variable_package, business_set.get_dataset_name(), x), sectors), 
                dataset_pool=dataset_pool, resources = resources)
        self.remove_businesses = array([], dtype='int32')
            
        for sector in sectors:
            b_is_in_sector = business_set.get_attribute("is_of_sector_%s" % sector)
            if control_for_businesses:
                total_businesses = control_totals.get_data_element_by_id((year,sector)).total_number_of_businesses
                diff = int(total_businesses - b_is_in_sector.astype(int8).sum())
                self._do_sector_for_businesses(sector, diff, business_set, b_is_in_sector)
            else:
                total_jobs = control_totals.get_data_element_by_id((year,sector)).total_number_of_jobs
                diff = int(total_jobs - business_set.get_attribute_by_index("employment", b_is_in_sector).sum())
                self._do_sector_for_jobs(sector, diff, business_set, b_is_in_sector)
             
        business_set.remove_elements(self.remove_businesses)
        business_set.add_elements(self.new_businesses, require_all_attributes=False)
        difference = business_set.size()-business_size
        self.debug.print_debug("Difference in number of businesses: %s (original %s,"
            " new %s, created %s, deleted %s)" 
                % (difference, 
                   business_size, 
                   business_set.size(), 
                   self.new_businesses[self.business_id_name].size, 
                   self.remove_businesses.size), 
            3)
        self.debug.print_debug("Number of unplaced businesses: %s" 
            % where(business_set.get_attribute(self.location_id_name) <=0)[0].size, 
            3)
        return difference
    
    def _do_sector_for_businesses(self, sector, diff, business_set, is_in_sector):
        available_business_index = where(is_in_sector)[0]
        if diff < 0: #
            sample_array, non_placed, size_non_placed = \
                get_array_without_non_placed_agents(business_set, available_business_index, -1*diff, 
                                                     self.location_id_name)
            self.remove_businesses = concatenate((self.remove_businesses, non_placed, 
                                       sample_noreplace(sample_array, max(0,abs(diff)-size_non_placed))))
                            
        if diff > 0: #
            self.new_businesses[self.location_id_name]=concatenate((self.new_businesses[self.location_id_name],zeros((diff,))))
            self.new_businesses["sector_id"]=concatenate((self.new_businesses["sector_id"], sector*ones((diff,))))
            sampled_business = probsample_replace(available_business_index, diff, None)
            self.new_businesses["sqft"] = concatenate((self.new_businesses["sqft"],
                                                 business_set.get_attribute("sqft")[sampled_business]))
            self.new_businesses["employment"] = concatenate((self.new_businesses["employment"],
                                                       business_set.get_attribute("employment")[sampled_business]))
            self.new_businesses["activity_id"] = concatenate((self.new_businesses["activity_id"],
                                                       business_set.get_attribute("activity_id")[sampled_business]))
            
            new_max_id = self.max_id+diff
            self.new_businesses[self.business_id_name]=concatenate((self.new_businesses[self.business_id_name], 
                                                                    arange(self.max_id+1, new_max_id+1)))
            self.max_id = new_max_id
                
    def _do_sector_for_jobs(self, sector, diff, business_set, b_is_in_sector):
        # diff is a difference in jobs (not businesses)
        employment = business_set.get_attribute('employment')
        available_business_index = where(b_is_in_sector)[0]
        if diff < 0: #
            placed, non_placed, size_non_placed = \
                get_array_without_non_placed_agents(business_set, available_business_index, -1*available_business_index.size, 
                                                     self.location_id_name)
            consider_for_removing = concatenate((permutation(non_placed), permutation(placed)))
            empl_cumsum = cumsum(employment[consider_for_removing])
            remove_b = consider_for_removing[empl_cumsum <= abs(diff)]
            self.remove_businesses = concatenate((self.remove_businesses, remove_b))
                            
        if diff > 0: #
            total_empl_added = 0
            sampled_business = array([], dtype=int32)
            while total_empl_added < diff:
                consider_for_duplicating = permutation(available_business_index)
                empl_cumsum = cumsum(employment[consider_for_duplicating])
                sampled_business = concatenate((sampled_business, consider_for_duplicating[empl_cumsum+total_empl_added <= diff]))
                if empl_cumsum[-1]+total_empl_added > diff:
                    break
                total_empl_added += employment[sampled_business].sum()

            self.new_businesses[self.location_id_name]=concatenate((self.new_businesses[self.location_id_name],zeros((sampled_business.size,))))
            self.new_businesses["sector_id"]=concatenate((self.new_businesses["sector_id"], sector*ones((sampled_business.size,))))
            self.new_businesses["sqft"] = concatenate((self.new_businesses["sqft"],
                                                 business_set.get_attribute("sqft")[sampled_business]))
            self.new_businesses["employment"] = concatenate((self.new_businesses["employment"],
                                                       employment[sampled_business]))
            self.new_businesses["activity_id"] = concatenate((self.new_businesses["activity_id"],
                                                 business_set.get_attribute("activity_id")[sampled_business]))
            
            new_max_id = self.max_id+sampled_business.size
            self.new_businesses[self.business_id_name]=concatenate((self.new_businesses[self.business_id_name], 
                                                                    arange(self.max_id+1, new_max_id+1)))
            self.max_id = new_max_id
            
    def prepare_for_run(self, storage, in_table_name, id_name, **kwargs):
        from urbansim.datasets.control_total_dataset import ControlTotalDataset
        control_totals = ControlTotalDataset(in_storage=storage, 
                                             in_table_name=in_table_name,
                                             id_name=id_name
                                         )
        return control_totals
Example #54
0
class Variable(object):
    """Abstract base class for variables. Each variable implementation must be 
    a subclass of this class, placed in a module that has the same name 
    as the variable class. Each variable class is expected to contain a method "compute" 
    that takes one argument "arguments". It is of type Resources and can contain 
    anything that the compute method might need. 
    The 'compute' method  returns a result of the computation which should be 
    an array of size self.get_dataset().size().
    
    Each variable class can contain a method "dependencies" which returns a list 
    of attributes/variables that this class is dependent on.  The dependencies list 
    is a list of fully (or dataset) qualified variable names, one for each 
    dependent variable. All dependent datasets must be included in 'arguments'.    
    
    Each variable may have a pre- and post-check that will perform checks on the
    variable's inputs and the variable's results.  This allows each variable's
    implementation to specify a contract about what it does.  
    
    The 'check_variables' entry of the 'arguments' defines what variables to check
    (see method 'should_check'). If a variable is required to be checked, the 
    'S' method for that variable is called before the variable's 'compute' 
    method, and the 'post_check' method for that variable is called after the 
    variable's 'compute' method.  Both 'pre_check' and 'post_check' take 2
    arguments: values (the results from the 'compute' method), and 'arguments'.
    
    In case of using 'compute_with_dependencies' the datasets for which variables 
    are computed, are expected to have a method 'compute_variables' that 
    takes at least three arguments: name of the variable, package name and 
    an object of class Resources. This dataset method should
    use the Variable method 'compute_with_dependencies' in order to work recursively 
    through dependency trees (see compute_variables and _compute_one_variable of 
    opus_core.Dataset).
    
    The return type of this variable is defined by it's _return_type property, which
    may have one of the following numpy types: "bool8", "int8", "uint8", "int16", 
    "uint16", "int32", "uint32", "int64", "uint64", "float32", "float64", "complex64",
    "complex128", "longlong".
    """
    _return_type = None
    
    def __new__(cls, *args, **kwargs):
        """Setup to automatically log the running time of the compute method."""
        
        an_instance = object.__new__(cls)
        compute_method = an_instance.compute_with_dependencies

        def logged_method (*req_args, **opt_args):
            logger.start_block(name=an_instance.name(), verbose=False)
            try:
                results = compute_method(*req_args, **opt_args)
                an_instance._do_flush_dependent_variables_if_required()
            finally:
                logger.end_block()
            return results       
            
        an_instance.compute_with_dependencies = logged_method
        return an_instance        
    
    def __init__(self):
        self.dependencies_list = None
        self.dataset = None
        self.number_of_compute_runs = 0
        try:
            self.debug = SessionConfiguration().get('debuglevel', 0)
        except:
            self.debug = 0
        if isinstance(self.debug, int):
            self.debug = DebugPrinter(self.debug)
            
    def name(self):
        return self.__module__
            
    def _do_flush_dependent_variables_if_required(self):
        try:
            if not SessionConfiguration().get('flush_variables', False):
                return
        except:
            return
        from opus_core.datasets.interaction_dataset import InteractionDataset
        dataset = self.get_dataset()
        dependencies = self.get_current_dependencies()
        my_dataset_name = dataset.get_dataset_name()
        for iattr in range(len(dependencies)): # iterate over dependent variables
            dep_item = dependencies[iattr][0]
            if isinstance(dep_item, str):
                depvar_name = VariableName(dep_item)
            else:
                depvar_name = dep_item.get_variable_name() # dep_item should be an instance of AttributeBox
            dataset_name = depvar_name.get_dataset_name()
            if dataset_name == my_dataset_name:
                ds = dataset
            else:
                ds = SessionConfiguration().get_dataset_from_pool(dataset_name)
                #ds = dataset_pool.get_dataset('dataset_name')
            if not isinstance(ds, InteractionDataset):
                short_name = depvar_name.get_alias()
                if short_name not in ds.get_id_name():   
                    ds.flush_attribute(depvar_name)
        
    def compute(self, dataset_pool):
        """Returns the result of this variable.  Private use only."""
        raise NotImplementedError, "compute() method not implemented for this variable."
    
    def is_lag_variable(self):
        """Not a lag variable unless this function has been overridden to return True"""
        return False

    def _compute_and_check(self, dataset_pool):
        if has_this_method(self, "pre_check"):
            self.debug.print_debug("Running pre_check() for " + self.__class__.__module__,4)
            self.pre_check(dataset_pool)
        else:
            self.debug.print_debug("No pre_check() defined for " + self.__class__.__module__,4)
        values = self.compute(dataset_pool)
        if has_this_method(self, "post_check"):
            self.debug.print_debug("Running post_check() for " + self.__class__.__module__,4)
            self.post_check(values, dataset_pool)
        else:
            self.debug.print_debug("No post_check() defined for " + self.__class__.__module__,4)
        return values
        
    def compute_with_dependencies(self, dataset_pool, arguments={}):
        self._solve_dependencies(dataset_pool)
        if self.should_check(arguments):
            self.debug.print_debug("Computing and checking " + self.__class__.__module__,3)
            values = self._compute_and_check(dataset_pool)
        else:
            values = self.compute(dataset_pool)
        self.number_of_compute_runs += 1
        if self._return_type:
            return self._cast_values(values, arguments)
        return values

    if longlong == int32:
        __long_size = 2**31 - 1
    else:
        __long_size = 2**63 - 1
        
    _max_storable_value = {"bool8":1,
                            "int8":2**7 - 1,
                            "uint8":2**8 - 1,
                            "int16":2**15 - 1,
                            "uint16":2**16 - 1,
                            "int32":2**31 - 1,
                            "uint32":2**32 - 1,
                            "int64":2**63 - 1,
                            "uint64":2**64 - 1,
                            "float32":3.40282346638528860e+38,
                            "float64":1.79769313486231570e+308,
                            "complex64":3.40282346638528860e+38,
                            "complex128":1.79769313486231570e+308,
                            "longlong":__long_size,
                            }
        
    def _cast_values(self, values, arguments):
        """Change the return values to be of type self._return_type.
        If "should_check" is defined, first check for 
        values that are too large for the destination type or
        integer wrap-around."""
        type = values.dtype.str
        if self._return_type == type:
            return values
        if self.should_check(arguments):
            max_value = ma.maximum(values)
            if max_value > self._max_storable_value[self._return_type]:
                max_value_str = str(max_value)
                logger.log_error("Variable '%s' is being cast to type '%s', but contains a value (%s) too large to fit into that type."
                                 % (self.name(), self._return_type, max_value_str))
        return values.astype(self._return_type)

    def _solve_dependencies(self, dataset_pool):
        dataset = self.get_dataset()
        my_dataset_name = dataset.get_dataset_name()
        dependencies_list = self.get_current_dependencies()
        for i in range(len(dependencies_list)): # compute dependent variables
            dep_item = dependencies_list[i][0]
            if isinstance(dep_item, str):
                depvar_name = VariableName(dep_item)
            else:
                depvar_name = dep_item.get_variable_name() # dep_item should be an instance of AttributeBox
            dataset_name = depvar_name.get_dataset_name()
            version = dependencies_list[i][1]
            if dataset_name == my_dataset_name:
                ds = dataset
            else:
                ds = dataset_pool.get_dataset(dataset_name)
            (new_versions, value) = ds.compute_variables_return_versions_and_final_value([(depvar_name, version)], dataset_pool)
            self.dependencies_list[i] = (ds._get_attribute_box(depvar_name), new_versions[0])

        
    def get_all_dependencies(self):
        """Return all variables and attributes needed to compute this variable.  
        This is returned as a list of tuples where the first element is either AttributeBox or 
        VariableName of the dependent variable and the second element is the version for 
        which this variable was computed.
        """ 
        def create_fake_dataset(dataset_name):
            storage = StorageFactory().get_storage('dict_storage')
            
            storage.write_table(
                table_name='fake_dataset',
                table_data={
                    'id':array([], dtype='int32')
                    }
                )
            
            dataset = Dataset(in_storage=storage, in_table_name='fake_dataset', dataset_name=dataset_name, id_name="id")
            return dataset
        
        result_others = []
        dependencies_list = self.get_current_dependencies()
        for i in range(len(dependencies_list)):
            dep_item = dependencies_list[i][0]
            version = dependencies_list[i][1]
            isprimary = 0
            if isinstance(dep_item, str):
                depvar_name = VariableName(dep_item)
                dataset_name = depvar_name.get_dataset_name()
                var = VariableFactory().get_variable(depvar_name, create_fake_dataset(dataset_name), 
                                                               quiet=True)
                result_others = result_others + [(depvar_name, version)]                                              
            else: # dep_item should be an instance of AttributeBox
                var = dep_item.get_variable_instance()           
                result_others = result_others + [(dep_item, version)]
                isprimary = dep_item.is_primary()
                
            if (var <> None) and (not isprimary):
                res = var.get_all_dependencies()
                result_others = result_others + res
        return result_others
        
    def get_dependencies(self):
        """Return variables and attributes needed to compute this variable.  
        This is returned as a list of tuples where the first element is the 
        name of the particular dataset and the second element is the variable 
        name. It does not work through the dependencies tree.
        """ 
        if has_this_method(self, "dependencies"):
            return self.dependencies()
        return []
    
    def add_dependencies(self, dep_list=[]):
        """Can be used within 'compute' method to add dependencies. It is performed only 
        when the compute method runs for the first time.
        dep_list can be either a list of character strings or a list of AttributeBoxes."""
        if self.number_of_compute_runs == 0:
            if isinstance(dep_list, str):
                dep_list = [dep_list]
            self.dependencies_list = self.dependencies_list + map(lambda x: (x, 0), dep_list)
            
    def add_and_solve_dependencies(self, dep_list=[], dataset_pool=None):
        """Calls 'add_dependencies' and if it is run for the first time, it also calls the 
        '_solve_dependencies' method."""
        self.add_dependencies(dep_list)
        if self.number_of_compute_runs == 0:
            self._solve_dependencies(dataset_pool)
        
    def get_current_dependencies(self):
        if self.dependencies_list is None:
            self.dependencies_list = map(lambda x: (x, 0), self.get_dependencies())
        return self.dependencies_list
        
    def do_check(self, condition_str, values):
        def condition(x):
            return eval(condition_str)

        # This is a bit ugly, but the upgrade from Python 2.3.5 to
        # Python 2.4 broke backward compatability in regard to map and
        # numpy's rank-0 arrays. This attempts to detect a rank-0
        # array and convert it into something usable.
        try:
            try: len(values)
            except TypeError: values = array([values[()]])
        except: pass

        count = where(array(map(lambda x: not(condition(x)), values)) > 0)[0].size
        
        if (count > 0):
            logger.log_warning("Variable %s fails %d times on check %s" % 
                               (self.__class__.__module__, count, condition_str))
                
    def should_check(self, arguments=None):
        """Return True if this variable should be checked, otherwise False. The information of what
        variables to check is provided in the 'arguments' entry "check_variables". 
        If "check_variables" is missing or is None or is an empty list, do no checks. 
        If "check_variables" is '*', check all variables.
        If "check_variables" is a list containing this variable's name, check this variable. 
        """
        if not isinstance(arguments, Resources):
            return False
        check_variables = arguments.get("check_variables", None)
        if check_variables == None:
            return False
        if (check_variables == '*') or \
           (isinstance(check_variables, list) and (len(check_variables) > 0) and 
            (self.__class__.__name__ in check_variables)):
            return True
        return False
     
    def are_dependent_variables_up_to_date(self, version):
        result = []  
        all_dependencies_list = self.get_all_dependencies()
        for variable, version  in all_dependencies_list:
            if isinstance(variable, AttributeBox):
                result.append(variable.is_version(version))
            else: # of type VariableName (means variable wasn't used yet)
                result.append(False)
        return result
        
    def get_highest_version_of_dependencies(self):
        dependencies_list = self.get_current_dependencies()
        if len(dependencies_list) <= 0:
            return 0
        versions = array(map(lambda x: x[1], dependencies_list))
        return versions.max()
    
    def set_dataset(self, dataset):
        self.dataset = dataset
    
    def get_dataset(self):
        return self.dataset

    def safely_divide_two_arrays(self, numerator, denominator, value_for_divide_by_zero=0.0):
        """Returns the result of numerator/denominator with the value_for_divide_by_zero 
        wherever denominator == 0.
        """
        return ma.filled(numerator / ma.masked_where(denominator == 0, denominator),
                      value_for_divide_by_zero)
    
    def safely_divide_two_attributes(self, numerator_name, denominator_name, value_for_divide_by_zero=0.0):
        """Returns the result of dividing the numerator_name attribute of this variable
        by the denominator_name attribute of this variable; return the value_for_divide_by_zero 
        wherever denominator == 0.
        """
        numerator = self.get_dataset().get_attribute(numerator_name)
        denominator = self.get_dataset().get_attribute(denominator_name)
        return self.safely_divide_two_arrays(numerator, denominator, value_for_divide_by_zero)
class BusinessTransitionModel(Model):
    """Creates and removes businesses from business_set."""

    model_name = "Business Transition Model"
    location_id_name = "building_id"
    variable_package = "urbansim_parcel"

    def __init__(self, debuglevel=0):
        self.debug = DebugPrinter(debuglevel)

    def run(self,
            year,
            business_set,
            control_totals,
            data_objects=None,
            resources=None):
        business_id_name = business_set.get_id_name()[0]
        control_totals.get_attribute("total_number_of_businesses")
        idx = where(control_totals.get_attribute("year") == year)
        sectors = unique(
            control_totals.get_attribute_by_index("building_use_id", idx))
        max_id = business_set.get_id_attribute().max()
        business_size = business_set.size()
        new_businesses = {
            self.location_id_name: array([], dtype='int32'),
            "building_use_id": array([], dtype='int32'),
            business_id_name: array([], dtype='int32'),
            "sqft": array([], dtype=int32),
            "employees": array([], dtype=int32),
        }
        compute_resources = Resources(data_objects)
        #        compute_resources.merge({job_building_types.get_dataset_name():job_building_types, "debug":self.debug})
        business_set.compute_variables(map(
            lambda x: "%s.%s.is_sector_%s" %
            (self.variable_package, business_set.get_dataset_name(), x),
            sectors),
                                       resources=compute_resources)
        remove_businesses = array([], dtype='int32')

        for sector in sectors:
            total_businesses = control_totals.get_data_element_by_id(
                (year, sector)).total_number_of_businesses
            is_in_sector = business_set.get_attribute("is_sector_%s" % sector)
            diff = int(total_businesses - is_in_sector.astype(int8).sum())

            if diff < 0:  #
                w = where(is_in_sector == 1)[0]
                sample_array, non_placed, size_non_placed = \
                    get_array_without_non_placed_agents(business_set, w, -1*diff,
                                                         self.location_id_name)
                remove_businesses = concatenate(
                    (remove_businesses, non_placed,
                     sample_noreplace(sample_array,
                                      max(0,
                                          abs(diff) - size_non_placed))))

            if diff > 0:  #
                new_businesses[self.location_id_name] = concatenate(
                    (new_businesses[self.location_id_name],
                     zeros((diff, ), dtype="int32")))
                new_businesses["building_use_id"] = concatenate(
                    (new_businesses["building_use_id"], sector * ones(
                        (diff, ), dtype="int32")))

                available_business_index = where(is_in_sector)[0]
                sampled_business = probsample_replace(available_business_index,
                                                      diff, None)

                new_businesses["sqft"] = concatenate(
                    (new_businesses["sqft"],
                     business_set.get_attribute("sqft")[sampled_business]))
                new_businesses["employees"] = concatenate((
                    new_businesses["employees"],
                    business_set.get_attribute("employees")[sampled_business]))

                new_max_id = max_id + diff
                new_businesses[business_id_name] = concatenate(
                    (new_businesses[business_id_name],
                     arange(max_id + 1, new_max_id + 1)))
                max_id = new_max_id

        business_set.remove_elements(remove_businesses)
        business_set.add_elements(new_businesses, require_all_attributes=False)
        difference = business_set.size() - business_size
        self.debug.print_debug(
            "Difference in number of businesses: %s (original %s,"
            " new %s, created %s, deleted %s)" %
            (difference, business_size, business_set.size(),
             new_businesses[business_id_name].size, remove_businesses.size), 3)
        self.debug.print_debug(
            "Number of unplaced businesses: %s" %
            where(business_set.get_attribute(self.location_id_name) <= 0)
            [0].size, 3)
        return difference

    def prepare_for_run(self, storage, in_table_name, id_name, **kwargs):
        from urbansim.datasets.control_total_dataset import ControlTotalDataset
        control_totals = ControlTotalDataset(in_storage=storage,
                                             in_table_name=in_table_name,
                                             id_name=id_name)
        #        sample_control_totals(storage, control_totals, **kwargs)
        return control_totals
def create_from_parcel_and_development_template(parcel_dataset,
                                                development_template_dataset,
                                                parcel_index=None,
                                                template_index=None,
                                                filter_attribute=None,
                                                consider_constraints_as_rules=True,
                                                template_opus_path="urbansim_parcel.development_template",
                                                proposed_units_variable="urbansim_parcel.development_project_proposal.units_proposed",
                                                dataset_pool=None,
                                                resources=None):
    """create development project proposals from parcel and development_template_dataset,
    parcel_index - 1D array, indices of parcel_dataset. Status of the proposals is set to 'tentative'.
    template_index - index to templates that are available to create proposals;
    filter_attribute - variable that is used to filter proposals;
    
    If a development constraint table exists, create proposal dataset include only proposals that are allowed by constraints,
    otherwise, create a proposal dataset with Cartesian product of parcels x templates 
    """

    resources = Resources(resources)
    debug = resources.get("debug",  0)
    if not isinstance(debug, DebugPrinter):
        debug = DebugPrinter(debug)

    if parcel_index is not None and parcel_index.size <= 0:
        logger.log_warning("parcel index for creating development proposals is of size 0. No proposals will be created.")
        return None
        
    storage = StorageFactory().get_storage('dict_storage')
    current_year = SimulationState().get_current_time()
    
    def _get_data(parcel_ids, template_ids):
        return {
                "proposal_id": arange(1, parcel_ids.size+1, 1),
                "parcel_id" : parcel_ids,
                "template_id": template_ids,
                "start_year": array(parcel_ids.size*[current_year]),
                "status_id": resize(array([DevelopmentProjectProposalDataset.id_tentative], dtype="int16"), 
                    parcel_ids.size)
                }
        
    def _create_project_proposals(parcel_ids, template_ids):
        storage.write_table(table_name='development_project_proposals',
            table_data = _get_data(parcel_ids, template_ids)
            )
        development_project_proposals = DevelopmentProjectProposalDataset(resources=Resources(resources),
                                                                          dataset1 = parcel_dataset,
                                                                          dataset2 = development_template_dataset,
                                                                          index1 = parcel_index,
                                                                          index2 = template_index,
                                                                          in_storage=storage,
                                                                          in_table_name='development_project_proposals',
                                                                          )
        return development_project_proposals
    
    def _compute_filter(proposals):
        if filter_attribute is not None:
            proposals.compute_variables(filter_attribute, dataset_pool=dataset_pool,
                                                          resources=Resources(resources))
            filter_index = where(proposals.get_attribute(filter_attribute) > 0)[0]
            return filter_index
        return None
    
    def _subset_by_filter(proposals):
        filter_index = _compute_filter(proposals)
        if filter_index is not None:
            proposals.subset_by_index(filter_index, flush_attributes_if_not_loaded=False)
        return proposals


    if parcel_index is not None:
        index1 = parcel_index
    else:
        index1 = arange(parcel_dataset.size())

    if template_index is not None:
        index2 = template_index
    else:
        index2 = arange(development_template_dataset.size())

    has_constraint_dataset = True
    try:
        constraints = dataset_pool.get_dataset("development_constraint") 
        constraints.load_dataset_if_not_loaded()
    except:
        has_constraint_dataset = False

    if has_constraint_dataset:
        constraint_types = unique(constraints.get_attribute("constraint_type"))  #unit_per_acre, far etc
        development_template_dataset.compute_variables(map(lambda x: "%s.%s" % (template_opus_path, x), constraint_types), dataset_pool)
            
        parcel_dataset.get_development_constraints(constraints, dataset_pool, 
                                                   index=index1, 
                                                   consider_constraints_as_rules=consider_constraints_as_rules)
        generic_land_use_type_ids = development_template_dataset.compute_variables("urbansim_parcel.development_template.generic_land_use_type_id",
                                                       dataset_pool=dataset_pool)
    parcel_ids = parcel_dataset.get_id_attribute()
    template_ids = development_template_dataset.get_id_attribute()
    
    proposal_parcel_ids = array([],dtype="int32")
    proposal_template_ids = array([],dtype="int32")
    logger.start_block("Combine parcels, templates and constraints")
    for i_template in index2:
        this_template_id = template_ids[i_template]
        fit_indicator = ones(index1.size, dtype="bool8")
        if has_constraint_dataset:
            generic_land_use_type_id = generic_land_use_type_ids[i_template]
            for constraint_type, constraint in parcel_dataset.development_constraints[generic_land_use_type_id].iteritems():
                template_attribute = development_template_dataset.get_attribute(constraint_type)[i_template]  #density converted to constraint variable name
                if template_attribute == 0:
                    continue
                min_constraint = constraint[:, 0].copy()
                max_constraint = constraint[:, 1].copy()
                ## treat -1 as unconstrainted
                w_unconstr = min_constraint == -1
                if w_unconstr.any():
                    min_constraint[w_unconstr] = template_attribute
                
                w_unconstr = max_constraint == -1
                if w_unconstr.any():
                    max_constraint[w_unconstr] = template_attribute

                fit_indicator = logical_and(fit_indicator, 
                                            logical_and(template_attribute >= min_constraint,
                                                        template_attribute <= max_constraint))
                

                if constraint_type == "units_per_acre":
                    res_units_capacity = parcel_dataset.get_attribute("parcel_sqft")[index1] * max_constraint / 43560.0 
                    debug.print_debug("template_id %s (GLU ID %s) max total residential capacity %s, %s of them fit constraints " % (this_template_id, generic_land_use_type_id, res_units_capacity.sum(), (res_units_capacity * fit_indicator).sum() ), 12)
                else:
                    non_res_capacity = parcel_dataset.get_attribute("parcel_sqft")[index1] * max_constraint
                    debug.print_debug("template_id %s (GLU ID %s) max total non residential capacity %s, %s of them fit constraints " % (this_template_id, generic_land_use_type_id, non_res_capacity.sum(), (non_res_capacity * fit_indicator).sum() ), 12)
                
        proposal_parcel_ids = concatenate((proposal_parcel_ids, parcel_ids[index1[fit_indicator]]))
        proposal_template_ids = concatenate( (proposal_template_ids, resize(array([this_template_id]), fit_indicator.sum())))
        
    logger.end_block()
    proposals = _create_project_proposals(proposal_parcel_ids, proposal_template_ids)
    proposals = _subset_by_filter(proposals)

    # eliminate proposals with zero units_proposed
    units_proposed = proposals.compute_variables([proposed_units_variable], dataset_pool = dataset_pool)
    where_up_greater_zero = where(units_proposed > 0)[0]
    if where_up_greater_zero.size > 0:
        proposals.subset_by_index(where_up_greater_zero, flush_attributes_if_not_loaded=False)
    
    logger.log_status("proposal set created with %s proposals." % proposals.size())
    #proposals.flush_dataset_if_low_memory_mode()
    return proposals
    def run(self,
            projects,
            types,
            units,
            year=0,
            location_id_name="grid_id",
            debuglevel=0):
        debug = DebugPrinter(debuglevel)
        grid_ids_for_any_project = array([], dtype=int32)
        grid_ids_by_project_type = {}
        for project_type in types:
            grid_ids_by_project_type[project_type] = array([], dtype=int32)
            if projects[project_type] <> None:
                grid_ids_by_project_type[project_type] = projects[
                    project_type].get_attribute(location_id_name)
            grid_ids_for_any_project = unique(
                concatenate((grid_ids_for_any_project,
                             grid_ids_by_project_type[project_type])))
        grid_ids_for_any_project = grid_ids_for_any_project[where(
            grid_ids_for_any_project > 0)]
        if not len(grid_ids_for_any_project): return

        result_data = {
            location_id_name:
            grid_ids_for_any_project,
            "scheduled_year": (year * ones(
                (grid_ids_for_any_project.size, ))).astype(int32)
        }
        for unit in units:
            result_data[unit] = zeros((grid_ids_for_any_project.size, ),
                                      dtype=int32)
        for project_type in types:
            result_data["%s_improvement_value" % project_type] = zeros(
                (grid_ids_for_any_project.size, ), dtype=int32)

        grid_idx = 0
        for grid_id in grid_ids_for_any_project:
            for i in range(0, len(types)):
                project_type = types[i]
                my_projects = projects[project_type]
                w = where(
                    my_projects.get_attribute(location_id_name) == grid_id)[0]
                if w.size > 0:
                    unit_variable = units[i]
                    result_data[unit_variable][grid_idx] = \
                        my_projects.get_attribute_by_index(
                            my_projects.get_attribute_name(), w).sum()
                    result_data["%s_improvement_value" % project_type][grid_idx] = \
                        my_projects.get_attribute_by_index(
                            "improvement_value", w).sum()
            grid_idx += 1

        storage = StorageFactory().get_storage('dict_storage')

        eventset_table_name = 'development_events_generated'
        storage.write_table(table_name=eventset_table_name,
                            table_data=result_data)

        eventset = DevelopmentEventDataset(
            in_storage=storage,
            in_table_name=eventset_table_name,
            id_name=[location_id_name, "scheduled_year"],
        )

        debug.print_debug(
            "Number of events: " + str(grid_ids_for_any_project.size), 3)
        return eventset