def run(self, config, show_output = False):
        logger.log_status("Caching large SQL tables to: " + config['cache_directory'])
        self.show_output = show_output
        
        #import pydevd;pydevd.settrace()
        
        server_configuration = config['scenario_database_configuration']
        
        scenario_database_manager = ScenarioDatabaseManager(
            server_configuration = server_configuration, 
            base_scenario_database_name = server_configuration.database_name                                                         
        )
        
        self.database_server = DatabaseServer(server_configuration)
        
        database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping()
        
        self.tables_to_cache = config['creating_baseyear_cache_configuration'].tables_to_cache
                
        simulation_state = SimulationState()
        if 'low_memory_run' in config:
            simulation_state.set_low_memory_run(config['low_memory_run'])
        simulation_state.set_cache_directory(config['cache_directory'])
        simulation_state.set_current_time(config['base_year'])
                  
        self.tables_cached = set()      
        for database_name, tables in database_to_table_mapping.items():
            self.cache_database_tables(config, database_name, tables)

        un_cached_tables = set(self.tables_to_cache) - self.tables_cached
        if un_cached_tables:
            logger.log_warning('The following requested tables were NOT cached:')
            for table_name in un_cached_tables:
                logger.log_warning('\t%s' % table_name)
Exemple #2
0
    def run(self, config, show_output=False):
        logger.log_status("Caching large SQL tables to: " +
                          config['cache_directory'])
        self.show_output = show_output

        #import pydevd;pydevd.settrace()

        server_configuration = config['scenario_database_configuration']

        scenario_database_manager = ScenarioDatabaseManager(
            server_configuration=server_configuration,
            base_scenario_database_name=server_configuration.database_name)

        self.database_server = DatabaseServer(server_configuration)

        database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping(
        )

        self.tables_to_cache = config[
            'creating_baseyear_cache_configuration'].tables_to_cache

        simulation_state = SimulationState()
        if 'low_memory_run' in config:
            simulation_state.set_low_memory_run(config['low_memory_run'])
        simulation_state.set_cache_directory(config['cache_directory'])
        simulation_state.set_current_time(config['base_year'])

        self.tables_cached = set()
        for database_name, tables in database_to_table_mapping.items():
            self.cache_database_tables(config, database_name, tables)

        un_cached_tables = set(self.tables_to_cache) - self.tables_cached
        if un_cached_tables:
            logger.log_warning(
                'The following requested tables were NOT cached:')
            for table_name in un_cached_tables:
                logger.log_warning('\t%s' % table_name)
Exemple #3
0
class ModelSystem(object):
    """
    Uses the information in configuration to run/estimate a set of models for given set of years.
    """

    def __init__(self):
        self.running = False
        self.forked_processes = []
        self.running_conditional = threading.Condition()

    def run(
        self,
        resources,
        write_datasets_to_cache_at_end_of_year=True,
        log_file_name="run_model_system.log",
        cleanup_datasets=True,
    ):
        """Entries in resources: (entries with no defaults are required)
               models - a list containing names of models to be run. Each name
                           must correspond to the name of the module/class of that model. Default(object): None
               years - a tuple (start year, end year)
               debuglevel - an integer. The higher the more output will be printed. Default: 0
               expression_library - a dictionary.  The keys in the dictionary are pairs (dataset_name, variable_name)
               and the values are the corresponding expressions.  The model system needs to set the expression library
               (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions
               in the xml expression library.  Default: None
        This method is called both to start up the simulation for all years, and also for each year
        when running with one process per year.  In the latter case, 'years' consists of just
        (current_year, current_year) rather than the real start and end years for the simulation.
        """
        if not isinstance(resources, Resources):
            raise TypeError, "Argument 'resources' must be of type 'Resources'."
        logger_settings = resources.get("log", {"tags": [], "verbosity_level": 3})
        logger.set_tags(logger_settings.get("tags", []))
        logger.set_verbosity_level(logger_settings.get("verbosity_level", 3))
        self.simulation_state = SimulationState()
        self.simulation_state.set_low_memory_run(resources.get("low_memory_mode", False))
        self.simulation_state.set_start_time(resources.get("base_year", 0))
        self.run_year_namespace = {}

        if resources.get("cache_directory", None) is not None:
            self.simulation_state.set_cache_directory(resources["cache_directory"])

        if "expression_library" in resources:
            VariableFactory().set_expression_library(resources["expression_library"])

        if resources.get("sample_input", False):
            self.update_config_for_multiple_runs(resources)

        cache_directory = self.simulation_state.get_cache_directory()
        log_file = os.path.join(cache_directory, log_file_name)
        logger.enable_file_logging(log_file, verbose=False)
        try:
            logger.log_status("Cache Directory set to: " + cache_directory)

            with logger.block("Start simulation run"):
                models = resources.get("models", [])
                models_in_years = resources.get("models_in_year", {})

                resources.check_obligatory_keys(["years"])

                years = resources["years"]
                if (not isinstance(years, tuple)) and (not isinstance(years, list)):
                    raise TypeError, "Entry 'years' in resources must be a tuple."

                if len(years) < 2:
                    print years
                    raise StandardError, "Entry 'years' in resources must be of length at least 2."

                start_year = years[0]
                end_year = years[-1]

                debuglevel = resources.get("debuglevel", 0)
                seed_values = resources.get("seed", NO_SEED)

                logger.log_status("random seed = %s" % str(seed_values))
                seed(seed_values)

                for year in range(start_year, end_year + 1):
                    with logger.block("Starting simulation for year " + str(year)):
                        self.simulation_state.set_current_time(year)
                        SessionConfiguration().get_dataset_pool().remove_all_datasets()
                        logger.disable_file_logging(log_file)
                        try:
                            if models_in_years.get(year, None) is not None:
                                models_to_run = models_in_years[year]
                            else:
                                models_to_run = models
                            self._run_year(
                                year=year,
                                models=models_to_run,
                                simulation_state=self.simulation_state,
                                debuglevel=debuglevel,
                                resources=resources,
                                write_datasets_to_cache_at_end_of_year=write_datasets_to_cache_at_end_of_year,
                                cleanup_datasets=cleanup_datasets,
                            )
                        finally:
                            logger.enable_file_logging(log_file, verbose=False)
                        collect()

        finally:
            logger.disable_file_logging(log_file)

    def flush_datasets(self, dataset_names, after_model=False):
        dataset_pool = SessionConfiguration().get_dataset_pool()
        for dataset_name in dataset_names:
            if dataset_pool.has_dataset(dataset_name):
                self.flush_dataset(dataset_pool.get_dataset(dataset_name), after_model=after_model)

    def flush_dataset(self, dataset, after_model=False):
        """Write the PRIMARY attributes of this dataset to the cache."""
        if dataset and isinstance(dataset, Dataset):
            # Do not flush after model if not necessary
            if after_model:
                if len(dataset.get_attribute_names()) <= len(dataset.get_id_name()):
                    return
                if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and (
                    len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name())
                ):
                    dataset.delete_computed_attributes()
                    return
            dataset.delete_computed_attributes()
            dataset.load_and_flush_dataset()

    def flush_datasets_after_model(self, resources):
        if resources.get("flush_variables", False):
            AttributeCache().delete_computed_tables()
            # this will also delete computed attributes
            datasets_to_cache = SessionConfiguration().get_dataset_pool().datasets_in_pool().keys()
        else:
            datasets_to_cache = resources.get("datasets_to_cache_after_each_model", [])
        self.flush_datasets(datasets_to_cache, after_model=True)

    def _run_year(
        self,
        year,
        models,
        simulation_state,
        debuglevel,
        resources,
        write_datasets_to_cache_at_end_of_year,
        cleanup_datasets=True,
    ):
        """
        Assumes that all datasets resides in the cache directory in binary format.
        """
        try:
            import wingdbstub
        except:
            pass
        self.vardict = {}
        log_file_name = os.path.join(simulation_state.get_cache_directory(), "year_%s_log.txt" % year)
        logger.enable_file_logging(log_file_name, "w")
        try:
            logger.start_block("Simulate year %s" % year)
            try:
                base_year = resources["base_year"]
                if year == base_year:
                    year_for_base_year_cache = year  # case of estimation
                else:
                    year_for_base_year_cache = year - 1
                cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache)
                self.vardict["cache_storage"] = cache_storage
                base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year)
                self.vardict["base_cache_storage"] = base_cache_storage
                simulation_state.set_flush_datasets(resources.get("flush_variables", False))
                SessionConfiguration()["simulation_year"] = year
                SessionConfiguration()["debuglevel"] = debuglevel
                datasets_to_preload_in_year = resources.get("datasets_to_preload_in_year", {})
                if datasets_to_preload_in_year.get(year, None) is not None:
                    datasets_to_preload = datasets_to_preload_in_year[year]
                else:
                    datasets_to_preload = resources.get("datasets_to_preload", {})
                for dataset_name in datasets_to_preload:
                    SessionConfiguration().get_dataset_from_pool(dataset_name)
                models_configuration = resources.get("models_configuration", {})
                dataset_pool = SessionConfiguration().get_dataset_pool()
                datasets = {}
                for dataset_name, its_dataset in dataset_pool.datasets_in_pool().iteritems():
                    self.vardict[dataset_name] = its_dataset
                    datasets[dataset_name] = its_dataset
                    exec "%s=its_dataset" % dataset_name

                # This is needed. It resides in locals()
                # and is passed on to models as they run.
                ### TODO: There has got to be a better way!
                model_resources = Resources(datasets)
                n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run(
                    models, models_configuration
                )
                self.run_year_namespace = locals()
                # ==========
                # Run the models.
                # ==========
                model_number = -1
                for model_entry in models:
                    # list 'models' can be in the form:
                    # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
                    #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
                    #                                      'commercial']}},
                    #  {'model_name_3': ['estimate', 'run']},
                    #  'model_name_4',
                    #  {'model_name_5': {'group_members': 'all'}}
                    # ]
                    # get list of methods to be processed evtl. for each group member
                    if isinstance(model_entry, dict):
                        model_name, value = model_entry.items()[0]
                        if not isinstance(value, dict):  # is a model group
                            processes = value
                            if not isinstance(processes, list):
                                processes = [processes]
                    else:  # in the form 'model_name_4' in the comment above
                        model_name = model_entry
                        processes = ["run"]
                    group_member = None
                    model_group = model_group_members_to_run[model_name][1]
                    last_member = max(1, len(model_group_members_to_run[model_name][0].keys()))
                    for imember in range(last_member):
                        controller_config = models_configuration[model_name]["controller"]
                        model_configuration = models_configuration[model_name]
                        if model_group_members_to_run[model_name][0].keys():
                            group_member_name = model_group_members_to_run[model_name][0].keys()[imember]
                            group_member = ModelGroupMember(model_group, group_member_name)
                            processes = model_group_members_to_run[model_name][0][group_member_name]
                            member_model_name = "%s_%s" % (group_member_name, model_name)
                            if member_model_name in models_configuration.keys():
                                model_configuration = models_configuration[member_model_name]
                                if "controller" in model_configuration.keys():
                                    controller_config = model_configuration["controller"]
                        datasets_to_preload_for_this_model = controller_config.get(
                            "_model_structure_dependencies_", {}
                        ).get("dataset", [])
                        for dataset_name in datasets_to_preload_for_this_model:
                            try:
                                if not dataset_pool.has_dataset(dataset_name) or (dataset_name not in datasets.keys()):
                                    ds = dataset_pool.get_dataset(dataset_name)
                                    self.vardict[dataset_name] = ds
                                    datasets[dataset_name] = ds
                                    exec "%s=ds" % dataset_name
                            except:
                                logger.log_warning("Failed to load dataset %s." % dataset_name)
                        # import part
                        if "import" in controller_config.keys():
                            import_config = controller_config["import"]
                            for import_module in import_config.keys():
                                exec ("from %s import %s" % (import_module, import_config[import_module]))

                        # gui_import_replacements part
                        # This is a temporary hack -- replicates the functionality of the "import" section
                        # for use with the GUI.  The contents of this part of the config is a dictionary.
                        # Keys are names of models (not used here).  Values are 2 element pairs.
                        # The first element is a name and the second is a value.  Bind the name to the value.
                        if "gui_import_replacements" in controller_config.keys():
                            import_replacement_config = controller_config["gui_import_replacements"]
                            for model_name in import_replacement_config.keys():
                                pair = import_replacement_config[model_name]
                                temp = pair[1]
                                exec ("%s = temp") % pair[0]

                        # init part
                        model = self.do_init(locals())

                        # estimate and/or run part
                        for process in processes:
                            model_number = model_number + 1
                            # write status file
                            model.set_model_system_status_parameters(
                                year, n_models, model_number, resources.get("status_file_for_gui", None)
                            )
                            model.write_status_for_gui()
                            # prepare part
                            exec (self.do_prepare(locals()))
                            processmodel_config = controller_config[process]
                            if "output" in processmodel_config.keys():
                                outputvar = processmodel_config["output"]
                            else:
                                outputvar = "process_output"
                            self.vardict[outputvar] = self.do_process(locals())
                            exec outputvar + "=self.vardict[outputvar]"

                            # check command file from gui, if the simulation should be stopped or paused
                            self.do_commands_from_gui(resources.get("command_file_for_gui", None))

                            # capture namespace for interactive estimation
                            self.run_year_namespace = locals()
                            self.flush_datasets_after_model(resources)
                            del model
                            collect()

                # Write all datasets to cache.
                if write_datasets_to_cache_at_end_of_year:
                    logger.start_block("Writing datasets to cache for year %s" % year)
                    try:
                        for dataset_name, its_dataset in (
                            SessionConfiguration().get_dataset_pool().datasets_in_pool().iteritems()
                        ):
                            self.flush_dataset(its_dataset)
                    finally:
                        logger.end_block()

            finally:
                logger.end_block()
        finally:
            logger.disable_file_logging(log_file_name)

        if cleanup_datasets:
            SessionConfiguration().delete_datasets()

    def do_init(self, parent_state):
        """Run the 'init' part of this model's configuration.
        Returns model object.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> "self":
                exec ('%s = parent_state["%s"]' % (key, key))
        init_config = parent_state["controller_config"]["init"]
        group_member = parent_state["group_member"]
        if group_member is None:  # No model group
            cmd = "%s(%s)" % (init_config["name"], self.construct_arguments_from_config(init_config))
            model = eval(cmd)
        else:  # Model belongs to a group
            model = eval(
                "%s(group_member, %s)" % (init_config["name"], self.construct_arguments_from_config(init_config))
            )
        return model

    def do_prepare(self, parent_state):
        """Prepares for the current model in the parent state's context.
        What to do is determined by the contents of the current model's controller configuration.

        controller_config is the 'controller' part of the model configuration.
        vardict is a dictionary into which the output of the model's 'prepare_output'
        method will be put.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> "self":
                exec ('%s = parent_state["%s"]' % (key, key))
        key_name = "prepare_for_%s" % process
        if key_name in controller_config.keys():
            prepare_config = controller_config[key_name]
            if "output" in prepare_config.keys():
                outputvar = prepare_config["output"]
            else:
                outputvar = "prepare_output"
            self.vardict[outputvar] = eval(
                "model.%s(%s)" % (prepare_config["name"], self.construct_arguments_from_config(prepare_config))
            )
            return '%s=self.vardict["%s"]' % (outputvar, outputvar)
        else:
            # do nothing when return value is exec'ed
            return ""

    def do_process(self, parent_state):
        for key in parent_state.keys():
            if key <> "self":
                exec ('%s = parent_state["%s"]' % (key, key))
        ev = "model.%s(%s)" % (process, self.construct_arguments_from_config(processmodel_config))
        return eval(ev)

    def get_number_of_models_and_model_group_members_to_run(self, models, models_configuration):
        """Count number_of models in the list 'models' that can include group members (each member and each process is one model)."""
        # list models can be in the form:
        # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
        #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
        #                                      'commercial']}},
        #  {'model_name_3': ['estimate', 'run']},
        #  'model_name_4',
        #  {'model_name_5': {'group_members': 'all'}}
        # ]
        number_of_models = 1
        model_group_members_to_run = {}
        for model_entry in models:
            if isinstance(model_entry, dict):
                model_name, value = model_entry.items()[0]
                if isinstance(value, dict):  # is a model group
                    if not value.keys()[0] == "group_members":
                        raise KeyError, "Key for model " + model_name + " must be 'group_members'."
                    group_members = value["group_members"]
                    model_group = None
                    if "group_by_attribute" in models_configuration[model_name]["controller"].keys():
                        group_dataset_name, group_attribute = models_configuration[model_name]["controller"][
                            "group_by_attribute"
                        ]
                        model_group = ModelGroup(
                            SessionConfiguration().get_dataset_from_pool(group_dataset_name), group_attribute
                        )
                    if not isinstance(group_members, list):
                        group_members = [group_members]
                    if group_members[0] == "_all_":  # see 'model_name_5' example above
                        if model_group is None:
                            raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name
                        group_members = model_group.get_member_names()
                    model_group_members_to_run[model_name] = [{}, model_group]
                    for member in group_members:
                        if isinstance(member, dict):
                            # see 'model_name_2' ('residential') in the comment above
                            member_name = member.keys()[0]
                            model_group_members_to_run[model_name][0][member_name] = member[member_name]
                            if not isinstance(model_group_members_to_run[model_name][0][member_name], list):
                                model_group_members_to_run[model_name][0][member_name] = [
                                    model_group_members_to_run[model_name][0][member_name]
                                ]
                            number_of_models += len(model_group_members_to_run[model_name][0][member_name])
                        else:  # see 'model_name_1'
                            model_group_members_to_run[model_name][0][member] = ["run"]
                            number_of_models += len(model_group_members_to_run[model_name][0][member])
                else:  # in the form 'model_name_3' in the comment above
                    model_group_members_to_run[model_name] = [{}, None]
                    if not isinstance(value, list):
                        number_of_models += 1
                    else:
                        number_of_models += len(value)
            else:  # in the form 'model_name_4' in the comment above
                model_group_members_to_run[model_entry] = [{}, None]
                number_of_models += 1
        return (number_of_models, model_group_members_to_run)

    def do_commands_from_gui(self, filename=None):
        if (filename is None) or not os.path.exists(filename):
            return
        while True:
            f = file(filename)
            line = f.read().strip()
            f.close()
            if line == "stop":
                logger.log_warning("Simulation stopped.")
                sys.exit()
            elif line == "resume":
                break
            elif line <> "pause":
                logger.log_warning("Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'." % line)
            time.sleep(10)

    def run_multiprocess(self, resources):
        resources = Resources(resources)
        profiler_name = resources.get("profile_filename", None)
        if resources["cache_directory"] is not None:
            cache_directory = resources["cache_directory"]
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is absolutely no good reason to be
        ###       changing the Configuration!
        resources["cache_directory"] = cache_directory

        log_file = os.path.join(cache_directory, "run_multiprocess.log")
        logger.enable_file_logging(log_file)

        start_year = resources["years"][0]
        end_year = resources["years"][-1]
        nyears = end_year - start_year + 1
        root_seed = resources.get("seed", NO_SEED)
        if resources.get("_seed_dictionary_", None) is not None:
            # This is added by the RunManager to ensure reproducibility including restarted runs
            seed_dict = resources.get("_seed_dictionary_")
            seed_array = array(map(lambda year: seed_dict[year], range(start_year, end_year + 1)))
        else:
            seed(root_seed)
            seed_array = randint(1, 2 ** 30, nyears)
        logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year))
        logger.log_status("Simulation root seed: %s" % root_seed)

        for iyear, year in enumerate(range(start_year, end_year + 1)):
            success = self._run_each_year_as_separate_process(
                iyear, year, seed=seed_array[iyear], resources=resources, profiler_name=profiler_name, log_file=log_file
            )
            if not success:
                break

        self._notify_stopped()
        if profiler_name is not None:  # insert original value
            resources["profile_filename"] = profiler_name
        logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year))

    # TODO: changing of configuration
    def _run_each_year_as_separate_process(
        self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None
    ):

        logger.start_block("Running simulation for year %d in new process" % year)
        resources["years"] = (year, year)
        resources["seed"] = (seed,)

        if profiler_name is not None:
            # add year to the profile name
            resources["profile_filename"] = "%s_%s" % (profiler_name, year)

        optional_args = []
        if log_file:
            optional_args += ["--log-file-name", os.path.split(log_file)[-1]]

        success = False
        try:
            logger.disable_file_logging(log_file)
            success = self._fork_new_process(
                "opus_core.model_coordinators.model_system", resources, optional_args=optional_args
            )
            logger.enable_file_logging(log_file, verbose=False)
        finally:
            logger.end_block()

        return success

    def run_in_one_process(
        self, resources, run_in_background=False, class_path="opus_core.model_coordinators.model_system"
    ):
        resources = Resources(resources)
        if resources["cache_directory"] is not None:
            cache_directory = resources["cache_directory"]
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources["cache_directory"] = cache_directory

        self._fork_new_process("%s" % class_path, resources, delete_temp_dir=False, run_in_background=run_in_background)
        self._notify_stopped()

    def run_in_same_process(self, resources, **kwargs):
        resources = Resources(resources)
        if resources["cache_directory"] is not None:
            cache_directory = resources["cache_directory"]
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources["cache_directory"] = cache_directory

        self._notify_started()
        RunModelSystem(model_system=self, resources=resources, **kwargs)
        self._notify_stopped()

    def construct_arguments_from_config(self, config):
        key = "arguments"
        if (key not in config.keys()) or (len(config[key].keys()) <= 0):
            return ""
        arg_dict = config[key]
        result = ""
        for arg_key in arg_dict.keys():
            result += "%s=%s, " % (arg_key, arg_dict[arg_key])
        return result

    def wait_for_start(self):
        self.running_conditional.acquire()
        while not self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_finish(self):
        self.running_conditional.acquire()
        while self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_process_or_finish(self, process_index):
        self.running_conditional.acquire()
        while process_index >= len(self.forked_processes) and self.running:
            self.running_conditional.wait()
        self.running_conditional.release()
        if not self.running:
            process_index = len(self.forked_processes) - 1
        return process_index

    def _fork_new_process(self, module_name, resources, run_in_background=False, **key_args):
        self.running_conditional.acquire()
        self.running = True
        self.forked_processes.append(ForkProcess())
        key_args["run_in_background"] = run_in_background
        success = self.forked_processes[-1].fork_new_process(module_name, resources, **key_args)
        self.running_conditional.notifyAll()
        self.running_conditional.release()
        if not run_in_background:
            self.forked_processes[-1].wait()
            self.forked_processes[-1].cleanup()
        return success

    def _notify_started(self):
        self.running_conditional.acquire()
        self.running = True
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def _notify_stopped(self):
        self.running_conditional.acquire()
        self.running = False
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def update_config_for_multiple_runs(self, config):
        models_to_update = config.get("models_with_sampled_coefficients", [])
        if "models_in_year" not in config.keys():
            config["models_in_year"] = {}
        if config["models_in_year"].get(config["base_year"] + 1, None) is None:
            config["models_in_year"][config["base_year"] + 1] = config.get("models")

        for umodel in models_to_update:
            try:
                i = config["models_in_year"][config["base_year"] + 1].index(umodel)
                new_model_name = "%s_sampled_coef" % umodel
                config["models_in_year"][config["base_year"] + 1][i] = new_model_name
            except:
                pass
            config["models_configuration"][new_model_name] = Configuration(config["models_configuration"][umodel])
            config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][
                "sample_coefficients"
            ] = True
            config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][
                "distribution"
            ] = "'normal'"
            config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][
                "cache_storage"
            ] = "base_cache_storage"
class ModelSystem(object):
    """
    Uses the information in configuration to run/estimate a set of models for given set of years.
    """
    def __init__(self):
        self.running = False
        self.forked_processes = []
        self.running_conditional = threading.Condition()

    def run(self,
            resources,
            write_datasets_to_cache_at_end_of_year=True,
            log_file_name='run_model_system.log',
            cleanup_datasets=True):
        """Entries in resources: (entries with no defaults are required)
               models - a list containing names of models to be run. Each name
                           must correspond to the name of the module/class of that model. Default(object): None
               years - a tuple (start year, end year)
               debuglevel - an integer. The higher the more output will be printed. Default: 0
               expression_library - a dictionary.  The keys in the dictionary are pairs (dataset_name, variable_name)
               and the values are the corresponding expressions.  The model system needs to set the expression library
               (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions
               in the xml expression library.  Default: None
        This method is called both to start up the simulation for all years, and also for each year
        when running with one process per year.  In the latter case, 'years' consists of just
        (current_year, current_year) rather than the real start and end years for the simulation.
        """
        if not isinstance(resources, Resources):
            raise TypeError, "Argument 'resources' must be of type 'Resources'."
        logger_settings = resources.get("log", {
            "tags": [],
            "verbosity_level": 3
        })
        logger.set_tags(logger_settings.get("tags", []))
        logger.set_verbosity_level(logger_settings.get("verbosity_level", 3))
        self.simulation_state = SimulationState()
        self.simulation_state.set_low_memory_run(
            resources.get("low_memory_mode", False))
        self.simulation_state.set_start_time(resources.get("base_year", 0))
        self.run_year_namespace = {}

        if resources.get('cache_directory', None) is not None:
            self.simulation_state.set_cache_directory(
                resources['cache_directory'])

        if 'expression_library' in resources:
            VariableFactory().set_expression_library(
                resources['expression_library'])

        if resources.get('sample_input', False):
            self.update_config_for_multiple_runs(resources)

        cache_directory = self.simulation_state.get_cache_directory()
        log_file = os.path.join(cache_directory, log_file_name)
        logger.enable_file_logging(log_file, verbose=False)
        try:
            logger.log_status("Cache Directory set to: " + cache_directory)

            with logger.block('Start simulation run'):
                models = resources.get("models", [])
                models_in_years = resources.get("models_in_year", {})

                resources.check_obligatory_keys(["years"])

                years = resources["years"]
                if (not isinstance(years, tuple)) and (not isinstance(
                        years, list)):
                    raise TypeError, "Entry 'years' in resources must be a tuple."

                if len(years) < 2:
                    print years
                    raise StandardError, "Entry 'years' in resources must be of length at least 2."

                start_year = years[0]
                end_year = years[-1]

                debuglevel = resources.get("debuglevel", 0)
                seed_values = resources.get('seed', NO_SEED)

                logger.log_status("random seed = %s" % str(seed_values))
                seed(seed_values)

                for year in range(start_year, end_year + 1):
                    with logger.block("Starting simulation for year " +
                                      str(year)):
                        self.simulation_state.set_current_time(year)
                        SessionConfiguration().get_dataset_pool(
                        ).remove_all_datasets()
                        logger.disable_file_logging(log_file)
                        try:
                            if models_in_years.get(year, None) is not None:
                                models_to_run = models_in_years[year]
                            else:
                                models_to_run = models
                            self._run_year(
                                year=year,
                                models=models_to_run,
                                simulation_state=self.simulation_state,
                                debuglevel=debuglevel,
                                resources=resources,
                                write_datasets_to_cache_at_end_of_year=
                                write_datasets_to_cache_at_end_of_year,
                                cleanup_datasets=cleanup_datasets)
                        finally:
                            logger.enable_file_logging(log_file, verbose=False)
                        collect()

        finally:
            logger.disable_file_logging(log_file)

    def flush_datasets(self, dataset_names, after_model=False):
        dataset_pool = SessionConfiguration().get_dataset_pool()
        for dataset_name in dataset_names:
            if dataset_pool.has_dataset(dataset_name):
                self.flush_dataset(dataset_pool.get_dataset(dataset_name),
                                   after_model=after_model)

    def flush_dataset(self, dataset, after_model=False):
        """Write the PRIMARY attributes of this dataset to the cache."""
        if dataset and isinstance(dataset, Dataset):
            # Do not flush after model if not necessary
            if after_model:
                if len(dataset.get_attribute_names()) <= len(
                        dataset.get_id_name()):
                    return
                if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and \
                                         (len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name())):
                    dataset.delete_computed_attributes()
                    return
            dataset.delete_computed_attributes()
            dataset.load_and_flush_dataset()

    def flush_datasets_after_model(self, resources):
        if resources.get('flush_variables', False):
            AttributeCache().delete_computed_tables()
            # this will also delete computed attributes
            datasets_to_cache = SessionConfiguration().get_dataset_pool(
            ).datasets_in_pool().keys()
        else:
            datasets_to_cache = resources.get(
                "datasets_to_cache_after_each_model", [])
        self.flush_datasets(datasets_to_cache, after_model=True)

    def _run_year(self,
                  year,
                  models,
                  simulation_state,
                  debuglevel,
                  resources,
                  write_datasets_to_cache_at_end_of_year,
                  cleanup_datasets=True):
        """
        Assumes that all datasets resides in the cache directory in binary format.
        """
        try:
            import wingdbstub
        except:
            pass
        self.vardict = {}
        log_file_name = os.path.join(simulation_state.get_cache_directory(),
                                     "year_%s_log.txt" % year)
        logger.enable_file_logging(log_file_name, 'w')
        try:
            logger.start_block('Simulate year %s' % year)
            try:
                base_year = resources['base_year']
                if year == base_year:
                    year_for_base_year_cache = year  # case of estimation
                else:
                    year_for_base_year_cache = year - 1
                cache_storage = AttributeCache().get_flt_storage_for_year(
                    year_for_base_year_cache)
                self.vardict['cache_storage'] = cache_storage
                base_cache_storage = AttributeCache().get_flt_storage_for_year(
                    base_year)
                self.vardict['base_cache_storage'] = base_cache_storage
                simulation_state.set_flush_datasets(
                    resources.get("flush_variables", False))
                SessionConfiguration()["simulation_year"] = year
                SessionConfiguration()["debuglevel"] = debuglevel
                datasets_to_preload_in_year = resources.get(
                    'datasets_to_preload_in_year', {})
                if datasets_to_preload_in_year.get(year, None) is not None:
                    datasets_to_preload = datasets_to_preload_in_year[year]
                else:
                    datasets_to_preload = resources.get(
                        'datasets_to_preload', {})
                for dataset_name in datasets_to_preload:
                    SessionConfiguration().get_dataset_from_pool(dataset_name)
                models_configuration = resources.get('models_configuration',
                                                     {})
                dataset_pool = SessionConfiguration().get_dataset_pool()
                datasets = {}
                for dataset_name, its_dataset in dataset_pool.datasets_in_pool(
                ).iteritems():
                    self.vardict[dataset_name] = its_dataset
                    datasets[dataset_name] = its_dataset
                    exec '%s=its_dataset' % dataset_name

                # This is needed. It resides in locals()
                # and is passed on to models as they run.
                ### TODO: There has got to be a better way!
                model_resources = Resources(datasets)
                n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run(
                    models, models_configuration)
                self.run_year_namespace = locals()
                #==========
                # Run the models.
                #==========
                model_number = -1
                for model_entry in models:
                    # list 'models' can be in the form:
                    # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
                    #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
                    #                                      'commercial']}},
                    #  {'model_name_3': ['estimate', 'run']},
                    #  'model_name_4',
                    #  {'model_name_5': {'group_members': 'all'}}
                    # ]
                    # get list of methods to be processed evtl. for each group member
                    if isinstance(model_entry, dict):
                        model_name, value = model_entry.items()[0]
                        if not isinstance(value, dict):  # is a model group
                            processes = value
                            if not isinstance(processes, list):
                                processes = [processes]
                    else:  # in the form 'model_name_4' in the comment above
                        model_name = model_entry
                        processes = ["run"]
                    group_member = None
                    model_group = model_group_members_to_run[model_name][1]
                    last_member = max(
                        1,
                        len(model_group_members_to_run[model_name][0].keys()))
                    for imember in range(last_member):
                        controller_config = models_configuration[model_name][
                            "controller"]
                        model_configuration = models_configuration[model_name]
                        if model_group_members_to_run[model_name][0].keys():
                            group_member_name = model_group_members_to_run[
                                model_name][0].keys()[imember]
                            group_member = ModelGroupMember(
                                model_group, group_member_name)
                            processes = model_group_members_to_run[model_name][
                                0][group_member_name]
                            member_model_name = "%s_%s" % (group_member_name,
                                                           model_name)
                            if member_model_name in models_configuration.keys(
                            ):
                                model_configuration = models_configuration[
                                    member_model_name]
                                if "controller" in model_configuration.keys():
                                    controller_config = model_configuration[
                                        "controller"]
                        datasets_to_preload_for_this_model = controller_config.get(
                            '_model_structure_dependencies_',
                            {}).get('dataset', [])
                        for dataset_name in datasets_to_preload_for_this_model:
                            try:
                                if not dataset_pool.has_dataset(
                                        dataset_name) or (
                                            dataset_name
                                            not in datasets.keys()):
                                    ds = dataset_pool.get_dataset(dataset_name)
                                    self.vardict[dataset_name] = ds
                                    datasets[dataset_name] = ds
                                    exec '%s=ds' % dataset_name
                            except:
                                logger.log_warning(
                                    'Failed to load dataset %s.' %
                                    dataset_name)
                        # import part
                        if "import" in controller_config.keys():
                            import_config = controller_config["import"]
                            for import_module in import_config.keys():
                                exec("from %s import %s" %
                                     (import_module,
                                      import_config[import_module]))

                        # gui_import_replacements part
                        # This is a temporary hack -- replicates the functionality of the "import" section
                        # for use with the GUI.  The contents of this part of the config is a dictionary.
                        # Keys are names of models (not used here).  Values are 2 element pairs.
                        # The first element is a name and the second is a value.  Bind the name to the value.
                        if "gui_import_replacements" in controller_config.keys(
                        ):
                            import_replacement_config = controller_config[
                                "gui_import_replacements"]
                            for model_name in import_replacement_config.keys():
                                pair = import_replacement_config[model_name]
                                temp = pair[1]
                                exec("%s = temp") % pair[0]

                        # init part
                        model = self.do_init(locals())

                        # estimate and/or run part
                        for process in processes:
                            model_number = model_number + 1
                            # write status file
                            model.set_model_system_status_parameters(
                                year, n_models, model_number,
                                resources.get('status_file_for_gui', None))
                            model.write_status_for_gui()
                            # prepare part
                            exec(self.do_prepare(locals()))
                            processmodel_config = controller_config[process]
                            if "output" in processmodel_config.keys():
                                outputvar = processmodel_config["output"]
                            else:
                                outputvar = "process_output"
                            self.vardict[outputvar] = self.do_process(locals())
                            exec outputvar + '=self.vardict[outputvar]'

                            # check command file from gui, if the simulation should be stopped or paused
                            self.do_commands_from_gui(
                                resources.get('command_file_for_gui', None))

                            # capture namespace for interactive estimation
                            self.run_year_namespace = locals()
                            self.flush_datasets_after_model(resources)
                            del model
                            collect()

                # Write all datasets to cache.
                if write_datasets_to_cache_at_end_of_year:
                    logger.start_block(
                        'Writing datasets to cache for year %s' % year)
                    try:
                        for dataset_name, its_dataset in SessionConfiguration(
                        ).get_dataset_pool().datasets_in_pool().iteritems():
                            self.flush_dataset(its_dataset)
                    finally:
                        logger.end_block()

            finally:
                logger.end_block()
        finally:
            logger.disable_file_logging(log_file_name)

        if cleanup_datasets:
            SessionConfiguration().delete_datasets()

    def do_init(self, parent_state):
        """Run the 'init' part of this model's configuration.
        Returns model object.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> 'self':
                exec('%s = parent_state["%s"]' % (key, key))
        init_config = parent_state['controller_config']["init"]
        group_member = parent_state['group_member']
        if group_member is None:  # No model group
            cmd = "%s(%s)" % (
                init_config["name"],
                self.construct_arguments_from_config(init_config))
            model = eval(cmd)
        else:  # Model belongs to a group
            model = eval("%s(group_member, %s)" %
                         (init_config["name"],
                          self.construct_arguments_from_config(init_config)))
        return model

    def do_prepare(self, parent_state):
        """Prepares for the current model in the parent state's context.
        What to do is determined by the contents of the current model's controller configuration.

        controller_config is the 'controller' part of the model configuration.
        vardict is a dictionary into which the output of the model's 'prepare_output'
        method will be put.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> 'self':
                exec('%s = parent_state["%s"]' % (key, key))
        key_name = "prepare_for_%s" % process
        if key_name in controller_config.keys():
            prepare_config = controller_config[key_name]
            if "output" in prepare_config.keys():
                outputvar = prepare_config["output"]
            else:
                outputvar = "prepare_output"
            self.vardict[outputvar] = eval(
                "model.%s(%s)" %
                (prepare_config["name"],
                 self.construct_arguments_from_config(prepare_config)))
            return '%s=self.vardict["%s"]' % (outputvar, outputvar)
        else:
            # do nothing when return value is exec'ed
            return ''

    def do_process(self, parent_state):
        for key in parent_state.keys():
            if key <> 'self':
                exec('%s = parent_state["%s"]' % (key, key))
        ev = "model.%s(%s)" % (
            process, self.construct_arguments_from_config(processmodel_config))
        return eval(ev)

    def get_number_of_models_and_model_group_members_to_run(
            self, models, models_configuration):
        """Count number_of models in the list 'models' that can include group members (each member and each process is one model)."""
        # list models can be in the form:
        # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
        #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
        #                                      'commercial']}},
        #  {'model_name_3': ['estimate', 'run']},
        #  'model_name_4',
        #  {'model_name_5': {'group_members': 'all'}}
        # ]
        number_of_models = 1
        model_group_members_to_run = {}
        for model_entry in models:
            if isinstance(model_entry, dict):
                model_name, value = model_entry.items()[0]
                if isinstance(value, dict):  # is a model group
                    if not value.keys()[0] == "group_members":
                        raise KeyError, "Key for model " + model_name + " must be 'group_members'."
                    group_members = value["group_members"]
                    model_group = None
                    if 'group_by_attribute' in models_configuration[
                            model_name]["controller"].keys():
                        group_dataset_name, group_attribute = models_configuration[
                            model_name]["controller"]['group_by_attribute']
                        model_group = ModelGroup(
                            SessionConfiguration().get_dataset_from_pool(
                                group_dataset_name), group_attribute)
                    if not isinstance(group_members, list):
                        group_members = [group_members]
                    if group_members[
                            0] == "_all_":  # see 'model_name_5' example above
                        if model_group is None:
                            raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name
                        group_members = model_group.get_member_names()
                    model_group_members_to_run[model_name] = [{}, model_group]
                    for member in group_members:
                        if isinstance(member, dict):
                            # see 'model_name_2' ('residential') in the comment above
                            member_name = member.keys()[0]
                            model_group_members_to_run[model_name][0][
                                member_name] = member[member_name]
                            if not isinstance(
                                    model_group_members_to_run[model_name][0]
                                [member_name], list):
                                model_group_members_to_run[model_name][0][
                                    member_name] = [
                                        model_group_members_to_run[model_name]
                                        [0][member_name]
                                    ]
                            number_of_models += len(
                                model_group_members_to_run[model_name][0]
                                [member_name])
                        else:  # see 'model_name_1'
                            model_group_members_to_run[model_name][0][
                                member] = ["run"]
                            number_of_models += len(
                                model_group_members_to_run[model_name][0]
                                [member])
                else:  # in the form 'model_name_3' in the comment above
                    model_group_members_to_run[model_name] = [{}, None]
                    if not isinstance(value, list):
                        number_of_models += 1
                    else:
                        number_of_models += len(value)
            else:  # in the form 'model_name_4' in the comment above
                model_group_members_to_run[model_entry] = [{}, None]
                number_of_models += 1
        return (number_of_models, model_group_members_to_run)

    def do_commands_from_gui(self, filename=None):
        if (filename is None) or not os.path.exists(filename):
            return
        while True:
            f = file(filename)
            line = f.read().strip()
            f.close()
            if line == 'stop':
                logger.log_warning('Simulation stopped.')
                sys.exit()
            elif line == 'resume':
                break
            elif line <> 'pause':
                logger.log_warning(
                    "Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'."
                    % line)
            time.sleep(10)

    def run_multiprocess(self, resources):
        resources = Resources(resources)
        profiler_name = resources.get("profile_filename", None)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is absolutely no good reason to be
        ###       changing the Configuration!
        resources['cache_directory'] = cache_directory

        log_file = os.path.join(cache_directory, 'run_multiprocess.log')
        logger.enable_file_logging(log_file)

        start_year = resources["years"][0]
        end_year = resources["years"][-1]
        nyears = end_year - start_year + 1
        root_seed = resources.get("seed", NO_SEED)
        if resources.get('_seed_dictionary_', None) is not None:
            # This is added by the RunManager to ensure reproducibility including restarted runs
            seed_dict = resources.get('_seed_dictionary_')
            seed_array = array(
                map(lambda year: seed_dict[year],
                    range(start_year, end_year + 1)))
        else:
            seed(root_seed)
            seed_array = randint(1, 2**30, nyears)
        logger.log_status("Running simulation for years %d thru %d" %
                          (start_year, end_year))
        logger.log_status("Simulation root seed: %s" % root_seed)

        for iyear, year in enumerate(range(start_year, end_year + 1)):
            success = self._run_each_year_as_separate_process(
                iyear,
                year,
                seed=seed_array[iyear],
                resources=resources,
                profiler_name=profiler_name,
                log_file=log_file)
            if not success:
                break

        self._notify_stopped()
        if profiler_name is not None:  # insert original value
            resources["profile_filename"] = profiler_name
        logger.log_status("Done running simulation for years %d thru %d" %
                          (start_year, end_year))

    #TODO: changing of configuration
    def _run_each_year_as_separate_process(self,
                                           iyear,
                                           year,
                                           seed=None,
                                           resources=None,
                                           profiler_name=None,
                                           log_file=None):

        logger.start_block('Running simulation for year %d in new process' %
                           year)
        resources['years'] = (year, year)
        resources['seed'] = seed,

        if profiler_name is not None:
            # add year to the profile name
            resources["profile_filename"] = "%s_%s" % (profiler_name, year)

        optional_args = []
        if log_file:
            optional_args += ['--log-file-name', os.path.split(log_file)[-1]]

        success = False
        try:
            logger.disable_file_logging(log_file)
            success = self._fork_new_process(
                'opus_core.model_coordinators.model_system',
                resources,
                optional_args=optional_args)
            logger.enable_file_logging(log_file, verbose=False)
        finally:
            logger.end_block()

        return success

    def run_in_one_process(
            self,
            resources,
            run_in_background=False,
            class_path='opus_core.model_coordinators.model_system'):
        resources = Resources(resources)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources['cache_directory'] = cache_directory

        self._fork_new_process('%s' % class_path,
                               resources,
                               delete_temp_dir=False,
                               run_in_background=run_in_background)
        self._notify_stopped()

    def run_in_same_process(self, resources, **kwargs):
        resources = Resources(resources)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources['cache_directory'] = cache_directory

        self._notify_started()
        RunModelSystem(model_system=self, resources=resources, **kwargs)
        self._notify_stopped()

    def construct_arguments_from_config(self, config):
        key = "arguments"
        if (key not in config.keys()) or (len(config[key].keys()) <= 0):
            return ""
        arg_dict = config[key]
        result = ""
        for arg_key in arg_dict.keys():
            result += "%s=%s, " % (arg_key, arg_dict[arg_key])
        return result

    def wait_for_start(self):
        self.running_conditional.acquire()
        while not self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_finish(self):
        self.running_conditional.acquire()
        while self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_process_or_finish(self, process_index):
        self.running_conditional.acquire()
        while process_index >= len(self.forked_processes) and self.running:
            self.running_conditional.wait()
        self.running_conditional.release()
        if not self.running:
            process_index = len(self.forked_processes) - 1
        return process_index

    def _fork_new_process(self,
                          module_name,
                          resources,
                          run_in_background=False,
                          **key_args):
        self.running_conditional.acquire()
        self.running = True
        self.forked_processes.append(ForkProcess())
        key_args["run_in_background"] = run_in_background
        success = self.forked_processes[-1].fork_new_process(
            module_name, resources, **key_args)
        self.running_conditional.notifyAll()
        self.running_conditional.release()
        if not run_in_background:
            self.forked_processes[-1].wait()
            self.forked_processes[-1].cleanup()
        return success

    def _notify_started(self):
        self.running_conditional.acquire()
        self.running = True
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def _notify_stopped(self):
        self.running_conditional.acquire()
        self.running = False
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def update_config_for_multiple_runs(self, config):
        models_to_update = config.get('models_with_sampled_coefficients', [])
        if 'models_in_year' not in config.keys():
            config['models_in_year'] = {}
        if config['models_in_year'].get(config['base_year'] + 1, None) is None:
            config['models_in_year'][config['base_year'] +
                                     1] = config.get('models')

        for umodel in models_to_update:
            try:
                i = config['models_in_year'][config['base_year'] +
                                             1].index(umodel)
                new_model_name = '%s_sampled_coef' % umodel
                config['models_in_year'][config['base_year'] +
                                         1][i] = new_model_name
            except:
                pass
            config["models_configuration"][new_model_name] = Configuration(
                config["models_configuration"][umodel])
            config["models_configuration"][new_model_name]["controller"][
                "prepare_for_run"]["arguments"]["sample_coefficients"] = True
            config["models_configuration"][new_model_name]["controller"][
                "prepare_for_run"]["arguments"]["distribution"] = "'normal'"
            config["models_configuration"][new_model_name]["controller"][
                "prepare_for_run"]["arguments"][
                    "cache_storage"] = "base_cache_storage"