Esempio n. 1
0
    def _run_each_year_as_separate_process(self,
                                           iyear,
                                           year,
                                           seed=None,
                                           resources=None,
                                           profiler_name=None,
                                           log_file=None):

        logger.start_block('Running simulation for year %d in new process' %
                           year)
        resources['years'] = (year, year)
        resources['seed'] = seed,

        if profiler_name is not None:
            # add year to the profile name
            resources["profile_filename"] = "%s_%s" % (profiler_name, year)

        optional_args = []
        if log_file:
            optional_args += ['--log-file-name', os.path.split(log_file)[-1]]

        success = False
        try:
            logger.disable_file_logging(log_file)
            success = self._fork_new_process(
                'opus_core.model_coordinators.model_system',
                resources,
                optional_args=optional_args)
            logger.enable_file_logging(log_file, verbose=False)
        finally:
            logger.end_block()

        return success
 def create_indicators(self, indicators,
                       file_name_for_indicator_results = 'indicator_results.html',
                       display_error_box = False, 
                       show_results = False):
     '''Handles the computation of a list of indicators.'''
     if (len(indicators) == 0):
         return
     
     source_data = indicators[0].source_data
     
         
     log_file_path = os.path.join(source_data.cache_directory, 'indicators.log')
     logger.enable_file_logging(log_file_path, 'a')
     logger.log_status('\n%s BEGIN %s %s' 
         % ('='*29, strftime('%Y_%m_%d_%H_%M', localtime()), '='*29))
 
     for indicator in indicators:
         indicator.create(display_error_box = display_error_box)
     
     logger.log_status('%s END %s %s\n' 
         % ('='*30, strftime('%Y_%m_%d_%H_%M', localtime()), '='*30))
     logger.disable_file_logging(log_file_path)
 
     results_page_path = self._write_results(indicators, 
                                             source_data, 
                                             file_name_for_indicator_results,
                                             display_error_box)
     
     if show_results:
         self.show_results(results_page_path)
         
     return results_page_path
    def _run_each_year_as_separate_process(self, start_year, end_year, seed_array, resources, log_file_name='run_multiprocess.log'):
        skip_first_year_of_urbansim = resources.get('skip_urbansim', False)
        log_file = os.path.join(resources['cache_directory'], log_file_name)
        profiler_name = resources.get("profile_filename", None)
        iyear = 0
        for year in range(start_year, end_year+1):
            if (year <> start_year) or ((year == start_year) and (not skip_first_year_of_urbansim)):
                logger.start_block('Running UrbanSim for year %d in new process' % year)
                try:
                    resources['years'] = (year, year)
                    resources['seed'] = seed_array[iyear],
                    logger.disable_file_logging(log_file)
                    if profiler_name is not None:
                        resources["profile_filename"] = "%s_%s" % (profiler_name, year) # add year to the profile name
                    self._fork_new_process(
                        'urbansim.model_coordinators.model_system', resources, optional_args=['--log-file-name', log_file_name])
                    logger.enable_file_logging(log_file, verbose=False)
                finally:
                    logger.end_block()
                    
            if ('travel_model_configuration' in resources) and (not resources.get('skip_travel_model', False)):
                # tnicolai add start year to travel model config
                tmc = resources['travel_model_configuration']
                tmc['start_year'] = start_year # end tnicolai
                self._run_travel_models_in_separate_processes(resources['travel_model_configuration'], year, resources)

            if 'post_year_configuration' in resources:
                self._run_travel_models_in_separate_processes(resources['post_year_configuration'], year, resources)
            iyear +=1
        self._notify_stopped()
Esempio n. 4
0
 def _run_each_year_as_separate_process(
         self,
         start_year,
         end_year,
         seed_array,
         resources,
         log_file_name='run_multiprocess.log'):
     log_file = os.path.join(resources['cache_directory'], log_file_name)
     profiler_name = resources.get("profile_filename", None)
     iyear = 0
     for year in range(start_year, end_year + 1):
         logger.start_block(
             'Running simulation for year %d in new process' % year)
         try:
             resources['years'] = (year, year)
             resources['seed'] = seed_array[iyear],
             logger.disable_file_logging(log_file)
             if profiler_name is not None:
                 resources["profile_filename"] = "%s_%s" % (
                     profiler_name, year)  # add year to the profile name
             self._fork_new_process(
                 'opus_core.model_coordinators.model_system',
                 resources,
                 optional_args=['--log-file-name', log_file_name])
             logger.enable_file_logging(log_file, verbose=False)
         finally:
             logger.end_block()
         iyear += 1
     self._notify_stopped()
    def create_indicators(
        self,
        indicators,
        file_name_for_indicator_results="indicator_results.html",
        display_error_box=False,
        show_results=False,
    ):
        """Handles the computation of a list of indicators."""
        if len(indicators) == 0:
            return

        source_data = indicators[0].source_data

        log_file_path = os.path.join(source_data.cache_directory, "indicators.log")
        logger.enable_file_logging(log_file_path, "a")
        logger.log_status("\n%s BEGIN %s %s" % ("=" * 29, strftime("%Y_%m_%d_%H_%M", localtime()), "=" * 29))

        for indicator in indicators:
            indicator.create(display_error_box=display_error_box)

        logger.log_status("%s END %s %s\n" % ("=" * 30, strftime("%Y_%m_%d_%H_%M", localtime()), "=" * 30))
        logger.disable_file_logging(log_file_path)

        results_page_path = self._write_results(
            indicators, source_data, file_name_for_indicator_results, display_error_box
        )

        if show_results:
            self.show_results(results_page_path)

        return results_page_path
Esempio n. 6
0
    def log_results(self):
        procedure = self.model_system.run_year_namespace["model"].procedure        
        if not hasattr(procedure, 'print_results'):
            logger.log_warning("Estimation procedure %s doesn't have a print_results() method, "  % procedure + \
                               "which is needed to log estimation results.")
            return

        tmp_config = Resources(self.config)
        outputvar = tmp_config['models_configuration'][self.model_name]['controller']['estimate']['arguments']['output']
        results = self.model_system.vardict.get(outputvar, "process_output")[1]
        
        storage_location = AttributeCache().get_storage_location()
        log_file_name = "estimate_models.log" ## one file for all estimation results
        logger.enable_file_logging( os.path.join(storage_location, log_file_name),
                                    mode = 'a')  ##appending instead of overwriting
        logger.start_block("%s Estimation Results" % self.model_name)        
        for submodel, submodel_results in results.items():
            logger.log_status( "Submodel %s" % submodel)
            if submodel_results == {}:
                logger.log_warning("No estimation results for submodel %s" % submodel)
            else:
                try:
                    procedure.print_results(submodel_results)
                except:
                    logger.log_warning("Problems in printing results for submodel %s" % submodel) 
        logger.end_block()
        logger.disable_file_logging()        
 def create_indicators(self, indicators,
                       file_name_for_indicator_results = 'indicator_results.html',
                       display_error_box = False, 
                       show_results = False):
     '''Handles the computation of a list of indicators.'''
     if (len(indicators) == 0):
         return
     
     source_data = indicators[0].source_data
     
         
     log_file_path = os.path.join(source_data.cache_directory, 'indicators.log')
     logger.enable_file_logging(log_file_path, 'a')
     logger.log_status('\n%s BEGIN %s %s' 
         % ('='*29, strftime('%Y_%m_%d_%H_%M', localtime()), '='*29))
 
     for indicator in indicators:
         indicator.create(display_error_box = display_error_box)
     
     logger.log_status('%s END %s %s\n' 
         % ('='*30, strftime('%Y_%m_%d_%H_%M', localtime()), '='*30))
     logger.disable_file_logging(log_file_path)
 
     results_page_path = self._write_results(indicators, 
                                             source_data, 
                                             file_name_for_indicator_results,
                                             display_error_box)
     
     if show_results:
         self.show_results(results_page_path)
         
     return results_page_path
Esempio n. 8
0
    def _run_each_year_as_separate_process(
        self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None
    ):

        logger.start_block("Running simulation for year %d in new process" % year)
        resources["years"] = (year, year)
        resources["seed"] = (seed,)

        if profiler_name is not None:
            # add year to the profile name
            resources["profile_filename"] = "%s_%s" % (profiler_name, year)

        optional_args = []
        if log_file:
            optional_args += ["--log-file-name", os.path.split(log_file)[-1]]

        success = False
        try:
            logger.disable_file_logging(log_file)
            success = self._fork_new_process(
                "opus_core.model_coordinators.model_system", resources, optional_args=optional_args
            )
            logger.enable_file_logging(log_file, verbose=False)
        finally:
            logger.end_block()

        return success
Esempio n. 9
0
    def __init__(self,
                 xml_config,
                 scenario,
                 calib_datasets,
                 target_expression,
                 target_file,
                 subset=None,
                 subset_patterns=None,
                 skip_cache_cleanup=False,
                 log_directory=None):
        """
        - xml_config: xml configuration file, for ex '/home/atschirhar/opus/project_configs/paris_zone.xml'
        - scenario: name of scenario to run for calibration, where models_to_run and simulation years are specified
        - calib_datasets: dictionary specifying dataset names and attributes to be calibrated, e.g.
                  {'establishment_location_choice_model_coefficients': 'estimate'}
        - target_expression: opus expression computing values from prediction to be compared with targets 
        - target_file: name of csv file providing targets 
        - subset: dictionary specifying the dataset to be calibrated,
                  {'etablishment_location_choice_model_coefficients': ['coefficient_name', ['paris_celcm, 'biotech_celcm']]}
          subset and subset_patterns can not be both specified for the same dataset
        - subset_patterns: dictionary specifying the dataset to be calibrated through a regular expression (re) pattern
                  {'etablishment_location_choice_model_coefficients': ['coefficient_name', '*_celcm']} 
          subset and subset_patterns can not be both specified for the same dataset

        """
        self.target_expression = target_expression
        self.target = self.read_target(target_file)

        self.run_manager = None
        self.xml_config = xml_config
        self.scenario = scenario
        self.skip_cache_cleanup = skip_cache_cleanup
        self.run_id, self.cache_directory = self.init_run()
        self.run_ids = [
            self.run_id
        ]  #allow starting of multiple runs for parallel optimization
        self.log_directory = log_directory
        if self.log_directory is None:
            self.log_directory = self.cache_directory  #legacy

        log_file = os.path.join(self.log_directory, "calibration.log")
        logger.enable_file_logging(log_file)

        dict_config = XMLConfiguration(self.xml_config).get_run_configuration(
            self.scenario)
        ## get parameters from config
        self.base_year = dict_config['base_year']
        self.start_year, self.end_year = dict_config['years']
        self.project_name = dict_config['project_name']
        self.package_order = dict_config[
            'dataset_pool_configuration'].package_order
    def __init__(
        self,
        xml_config,
        scenario,
        calib_datasets,
        target_expression,
        target_file,
        subset=None,
        subset_patterns=None,
        skip_cache_cleanup=False,
        log_directory=None,
    ):
        """
        - xml_config: xml configuration file, for ex '/home/atschirhar/opus/project_configs/paris_zone.xml'
        - scenario: name of scenario to run for calibration, where models_to_run and simulation years are specified
        - calib_datasets: dictionary specifying dataset names and attributes to be calibrated, e.g.
                  {'establishment_location_choice_model_coefficients': 'estimate'}
        - target_expression: opus expression computing values from prediction to be compared with targets 
        - target_file: name of csv file providing targets 
        - subset: dictionary specifying the dataset to be calibrated,
                  {'etablishment_location_choice_model_coefficients': ['coefficient_name', ['paris_celcm, 'biotech_celcm']]}
          subset and subset_patterns can not be both specified for the same dataset
        - subset_patterns: dictionary specifying the dataset to be calibrated through a regular expression (re) pattern
                  {'etablishment_location_choice_model_coefficients': ['coefficient_name', '*_celcm']} 
          subset and subset_patterns can not be both specified for the same dataset

        """
        self.target_expression = target_expression
        self.target = self.read_target(target_file)

        self.run_manager = None
        self.xml_config = xml_config
        self.scenario = scenario
        self.skip_cache_cleanup = skip_cache_cleanup
        self.run_id, self.cache_directory = self.init_run()
        self.run_ids = [self.run_id]  # allow starting of multiple runs for parallel optimization
        self.log_directory = log_directory
        if self.log_directory is None:
            self.log_directory = self.cache_directory  # legacy

        log_file = os.path.join(self.log_directory, "calibration.log")
        logger.enable_file_logging(log_file)

        dict_config = XMLConfiguration(self.xml_config).get_run_configuration(self.scenario)
        ## get parameters from config
        self.base_year = dict_config["base_year"]
        self.start_year, self.end_year = dict_config["years"]
        self.project_name = dict_config["project_name"]
        self.package_order = dict_config["dataset_pool_configuration"].package_order
Esempio n. 11
0
    def run_multiprocess(self, resources):
        resources = Resources(resources)
        profiler_name = resources.get("profile_filename", None)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is absolutely no good reason to be
        ###       changing the Configuration!
        resources['cache_directory'] = cache_directory

        log_file = os.path.join(cache_directory, 'run_multiprocess.log')
        logger.enable_file_logging(log_file)

        start_year = resources["years"][0]
        end_year = resources["years"][-1]
        nyears = end_year - start_year + 1
        root_seed = resources.get("seed", NO_SEED)
        if resources.get('_seed_dictionary_', None) is not None:
            # This is added by the RunManager to ensure reproducibility including restarted runs
            seed_dict = resources.get('_seed_dictionary_')
            seed_array = array(
                map(lambda year: seed_dict[year],
                    range(start_year, end_year + 1)))
        else:
            seed(root_seed)
            seed_array = randint(1, 2**30, nyears)
        logger.log_status("Running simulation for years %d thru %d" %
                          (start_year, end_year))
        logger.log_status("Simulation root seed: %s" % root_seed)

        for iyear, year in enumerate(range(start_year, end_year + 1)):
            success = self._run_each_year_as_separate_process(
                iyear,
                year,
                seed=seed_array[iyear],
                resources=resources,
                profiler_name=profiler_name,
                log_file=log_file)
            if not success:
                break

        self._notify_stopped()
        if profiler_name is not None:  # insert original value
            resources["profile_filename"] = profiler_name
        logger.log_status("Done running simulation for years %d thru %d" %
                          (start_year, end_year))
Esempio n. 12
0
    def create_batch(self, indicators, source_data):
        self.source_data = source_data
        cache_directory = self.source_data.cache_directory

        self.storage_location = os.path.join(
            self.source_data.get_indicator_directory(), '_stored_data')

        if not os.path.exists(self.source_data.get_indicator_directory()):
            os.mkdir(self.source_data.get_indicator_directory())
        if not os.path.exists(self.storage_location):
            os.mkdir(self.storage_location)

        log_file_path = os.path.join(cache_directory, 'indicators',
                                     'indicators.log')
        logger.enable_file_logging(log_file_path, 'a')
        logger.log_status(
            '\n%s Indicator Generation BEGIN %s %s' %
            ('=' * 10, strftime('%Y_%m_%d_%H_%M', localtime()), '=' * 10))

        self.dataset = None
        self.dataset_state = {
            'current_cache_directory': None,
            'year': None,
            'dataset_name': None
        }

        self.package_order = self.source_data.get_package_order()

        SimulationState().set_cache_directory(cache_directory)

        self._check_integrity(indicators=indicators, source_data=source_data)

        computed_indicators = self._make_all_indicators(
            indicators=indicators, source_data=source_data)

        if not self.test:
            self.write_computed_indicators_to_db(
                computed_indicator_group=computed_indicators,
                project_name=self.project_name)

        logger.log_status(
            '%s Indicator Generation END %s %s\n' %
            ('=' * 11, strftime('%Y_%m_%d_%H_%M', localtime()), '=' * 11))
        logger.disable_file_logging(log_file_path)

        return computed_indicators
Esempio n. 13
0
    def create_batch(self, indicators, source_data):
        self.source_data = source_data
        cache_directory = self.source_data.cache_directory

        self.storage_location = os.path.join(self.source_data.get_indicator_directory(),
                                             '_stored_data')

        if not os.path.exists(self.source_data.get_indicator_directory()):
            os.mkdir(self.source_data.get_indicator_directory())
        if not os.path.exists(self.storage_location):
            os.mkdir(self.storage_location)

        log_file_path = os.path.join(cache_directory,
                                     'indicators',
                                     'indicators.log')
        logger.enable_file_logging(log_file_path, 'a')
        logger.log_status('\n%s Indicator Generation BEGIN %s %s'
            % ('='*10, strftime('%Y_%m_%d_%H_%M', localtime()), '='*10))

        self.dataset = None
        self.dataset_state = {
              'current_cache_directory':None,
              'year':None,
              'dataset_name':None
        }

        self.package_order = self.source_data.get_package_order()

        SimulationState().set_cache_directory(cache_directory)

        self._check_integrity(indicators = indicators,
                              source_data = source_data)

        computed_indicators = self._make_all_indicators(
            indicators = indicators,
            source_data = source_data)

        if not self.test:
            self.write_computed_indicators_to_db(computed_indicator_group = computed_indicators,
                                                 project_name = self.project_name)

        logger.log_status('%s Indicator Generation END %s %s\n'
            % ('='*11, strftime('%Y_%m_%d_%H_%M', localtime()), '='*11))
        logger.disable_file_logging(log_file_path)

        return computed_indicators
Esempio n. 14
0
    def _run_each_year_as_separate_process(
            self,
            start_year,
            end_year,
            seed_array,
            resources,
            log_file_name='run_multiprocess.log'):
        skip_first_year_of_urbansim = resources.get('skip_urbansim', False)
        log_file = os.path.join(resources['cache_directory'], log_file_name)
        profiler_name = resources.get("profile_filename", None)
        iyear = 0
        for year in range(start_year, end_year + 1):
            if (year <> start_year) or ((year == start_year) and
                                        (not skip_first_year_of_urbansim)):
                logger.start_block(
                    'Running UrbanSim for year %d in new process' % year)
                try:
                    resources['years'] = (year, year)
                    resources['seed'] = seed_array[iyear],
                    logger.disable_file_logging(log_file)
                    if profiler_name is not None:
                        resources["profile_filename"] = "%s_%s" % (
                            profiler_name, year
                        )  # add year to the profile name
                    self._fork_new_process(
                        'urbansim.model_coordinators.model_system',
                        resources,
                        optional_args=['--log-file-name', log_file_name])
                    logger.enable_file_logging(log_file, verbose=False)
                finally:
                    logger.end_block()

            if ('travel_model_configuration' in resources) and (
                    not resources.get('skip_travel_model', False)):
                # tnicolai add start year to travel model config
                tmc = resources['travel_model_configuration']
                tmc['start_year'] = start_year  # end tnicolai
                self._run_travel_models_in_separate_processes(
                    resources['travel_model_configuration'], year, resources)

            if 'post_year_configuration' in resources:
                self._run_travel_models_in_separate_processes(
                    resources['post_year_configuration'], year, resources)
            iyear += 1
        self._notify_stopped()
Esempio n. 15
0
    def run_multiprocess(self, resources):
        resources = Resources(resources)
        profiler_name = resources.get("profile_filename", None)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is absolutely no good reason to be
        ###       changing the Configuration!
        resources['cache_directory'] = cache_directory

        log_file = os.path.join(cache_directory, 'run_multiprocess.log')
        logger.enable_file_logging(log_file)

        start_year = resources["years"][0]
        end_year = resources["years"][-1]
        nyears = end_year - start_year + 1
        root_seed = resources.get("seed", NO_SEED)
        if resources.get('_seed_dictionary_', None) is not None:
            # This is added by the RunManager to ensure reproducibility including restarted runs 
            seed_dict = resources.get('_seed_dictionary_')
            seed_array = array(map(lambda year : seed_dict[year], range(start_year, end_year+1)))
        else:
            seed(root_seed)
            seed_array = randint(1,2**30, nyears)
        logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year))
        logger.log_status("Simulation root seed: %s" % root_seed)

        for iyear, year in enumerate(range(start_year, end_year+1)):
            success = self._run_each_year_as_separate_process(iyear, year, 
                                                                 seed=seed_array[iyear],
                                                                 resources=resources,
                                                                 profiler_name=profiler_name,
                                                                 log_file=log_file)
            if not success:
                break

        self._notify_stopped()
        if profiler_name is not None: # insert original value
            resources["profile_filename"] = profiler_name
        logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year))
Esempio n. 16
0
    def create_batch(self, indicators, source_data):
        self.source_data = source_data
        cache_directory = self.source_data.cache_directory

        self.storage_location = os.path.join(self.source_data.get_indicator_directory(), "_stored_data")

        if not os.path.exists(self.source_data.get_indicator_directory()):
            os.mkdir(self.source_data.get_indicator_directory())
        if not os.path.exists(self.storage_location):
            os.mkdir(self.storage_location)

        log_file_path = os.path.join(cache_directory, "indicators", "indicators.log")
        logger.enable_file_logging(log_file_path, "a")
        logger.log_status(
            "\n%s Indicator Generation BEGIN %s %s" % ("=" * 10, strftime("%Y_%m_%d_%H_%M", localtime()), "=" * 10)
        )

        self.dataset = None
        self.dataset_state = {"current_cache_directory": None, "year": None, "dataset_name": None}

        self.package_order = self.source_data.get_package_order()

        SimulationState().set_cache_directory(cache_directory)

        self._check_integrity(indicators=indicators, source_data=source_data)

        computed_indicators = self._make_all_indicators(indicators=indicators, source_data=source_data)

        if not self.test:
            self.write_computed_indicators_to_db(
                computed_indicator_group=computed_indicators, project_name=self.project_name
            )

        logger.log_status(
            "%s Indicator Generation END %s %s\n" % ("=" * 11, strftime("%Y_%m_%d_%H_%M", localtime()), "=" * 11)
        )
        logger.disable_file_logging(log_file_path)

        return computed_indicators
Esempio n. 17
0
    def log_results(self):
        procedure = self.model_system.run_year_namespace["model"].procedure        
        if not hasattr(procedure, 'print_results'):
            logger.log_warning("Estimation procedure %s doesn't have a print_results() method, "  % procedure + \
                               "which is needed to log estimation results.")
            return

        tmp_config = Resources(self.config)
        outputvar = tmp_config['models_configuration'][self.model_name]['controller']['estimate']['arguments']['output']
        results = self.model_system.vardict.get(outputvar, "process_output")[1]
        
        storage_location = AttributeCache().get_storage_location()
        log_file_name = "estimate_models.log" ## one file for all estimation results
        logger.enable_file_logging( os.path.join(storage_location, log_file_name),
                                    mode = 'a')  ##appending instead of overwriting
        logger.start_block("%s Estimation Results" % self.model_name)        
        for submodel, submodel_results in results.items():
            logger.log_status( "Submodel %s" % submodel)
            if submodel_results == {}:
                logger.log_warning("No estimation results for submodel %s" % submodel)
            else:
                procedure.print_results(submodel_results)
        logger.end_block()
        logger.disable_file_logging()        
Esempio n. 18
0
    def estimate(self, spec_var=None, spec_py=None,
            submodel_string = "workers", 
            agent_sample_rate=0.005, alt_sample_size=None):
        """

        """
        CLOSE = 0.001
        sampler = "opus_core.samplers.weighted_sampler"
        if alt_sample_size==None:
            sampler = None
        
        date_time_str=strftime("%Y_%m_%d__%H_%M", localtime())
        agent_sample_rate_str = "__ASR_" + str(agent_sample_rate)
        alt_sample_size_str = "_ALT_" + str(alt_sample_size)
        info_file = date_time_str + agent_sample_rate_str + alt_sample_size_str + "__info.txt"
        logger.enable_file_logging(date_time_str + agent_sample_rate_str + alt_sample_size_str + "__run.txt")
        logger.enable_memory_logging()
        logger.log_status("Constrained Estimation with agent sample rate of %s and alternatvie sample size %s\n" % \
                          (agent_sample_rate, alt_sample_size))
                
        t1 = time()
        
        SimulationState().set_current_time(2000)

        self.nbs = SessionConfiguration().get_dataset_from_pool("neighborhood")
        self.hhs = SessionConfiguration().get_dataset_from_pool('household')

        depts, lambda_value = compute_lambda(self.nbs)
        supply, vacancy_rate = compute_supply_and_vacancy_rate(self.nbs, depts, lambda_value)
        self.nbs.set_values_of_one_attribute("supply", supply)
        dataset_pool = SessionConfiguration().get_dataset_pool()
        dataset_pool.add_datasets_if_not_included({'vacancy_rate': vacancy_rate,
                                                   'sample_rate':agent_sample_rate
                                                   })
        SessionConfiguration()["CLOSE"] = CLOSE
        SessionConfiguration()['info_file'] = info_file
        
        if self.save_estimation_results:
            out_storage = StorageFactory().build_storage_for_dataset(type='sql_storage', 
                storage_location=self.out_con)
        
        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification
        
        if spec_var is not None:
            self.specification = load_specification_from_dictionary(spec_var)
        else:
            in_storage = StorageFactory().build_storage_for_dataset(type='sql_storage', 
                storage_location=self.in_con)
            self.specification = EquationSpecification(in_storage=in_storage)
            self.specification.load(in_table_name="household_location_choice_model_specification")

        #submodel_string = "workers"
        
        seed(71) # was: seed(71,110)
        self.model_name = "household_location_choice_model"

        model = HouseholdLocationChoiceModelCreator().get_model(location_set=self.nbs, 
                                                                submodel_string=submodel_string,
                                                                sampler = sampler,
                                                                estimation_size_agents = agent_sample_rate * 100/20,    
                                                                # proportion of the agent set that should be used for the estimation,
                                                                # 
                                                                sample_size_locations = alt_sample_size,  # choice set size (includes current location)
                                                                compute_capacity_flag = True,
                                                                probabilities = "opus_core.mnl_probabilities",
                                                                choices = "urbansim.lottery_choices",
                                                                run_config = Resources({"capacity_string":"supply"}), 
                                                                estimate_config = Resources({"capacity_string":"supply","compute_capacity_flag":True}))

        #TODO: since households_for_estimation currently is the same as households, create_households_for_estimation
        #becomes unnecesarry
        #agent_set, agents_index_for_estimation  =  create_households_for_estimation(self.hhs, self.in_con)
        agent_set = self.hhs; agents_index_for_estimation = arange(self.hhs.size())
        self.result = model.estimate(self.specification, 
                                     agent_set=agent_set, 
                                     agents_index=agents_index_for_estimation, 
                                     debuglevel=self.debuglevel,
                                     procedure="urbansim.constrain_estimation_bhhh_two_loops" ) #"urbansim.constrain_estimation_bhhh"

        #save estimation results
        if self.save_estimation_results:    
            self.save_results(out_storage)
            
        logger.log_status("Estimation done. " + str(time()-t1) + " s")
Esempio n. 19
0
    for irun in range(number_of_runs):
        config['seed'] = (seed_array[irun], )
        this_config = config.copy()
        if ((irun + 1) % number_of_runs_in_parallel) == 0:
            run_in_background = False
        else:
            run_in_background = True
        run_manager.setup_new_run(
            cache_directory=this_config['cache_directory'],
            configuration=this_config)
        run_manager.run_run(this_config,
                            run_as_multiprocess=False,
                            run_in_background=run_in_background)
        if irun == 0:
            # log file for the multiple runs will be located in the first cache
            first_cache_directory = this_config['cache_directory']
            log_file = os.path.join(first_cache_directory, 'multiple_runs.log')
            logger.enable_file_logging(log_file)
            logger.log_status("Multiple runs: %s replications" %
                              number_of_runs)
            logger.log_status("root random seed = %s" % str(root_seed))
        else:
            logger.enable_file_logging(log_file, verbose=False)

        logger.log_status("Run %s: %s" %
                          (irun + 1, this_config['cache_directory']))
        logger.disable_file_logging(log_file)
        list_of_cache_directories.append(this_config['cache_directory'])
        write_to_text_file(
            os.path.join(first_cache_directory, "cache_directories"),
            list_of_cache_directories)
    sampler = None
    if options.sampler is not None:
        sampler = "opus_core.samplers.%s" % options.sampler
    
    aggregate_lookup = {True:'aggregate demand', False:'aggregate choice set'}
    
    date_time_str=strftime("%Y_%m_%d__%H_%M", localtime())
    #agent_sample_rate_str = '' #"__ASR_" + str(agent_sample_rate)
    #alt_sample_size_str = "_ALT_" + str(alt_sample_size)
    logger.enable_memory_logging()

    if options.sampler == 'weighted_sampler':
        file_name_pattern = date_time_str + "_alt_size_%s_%s_%s" % ( alt_sample_size, options.sampler, 
                                                                    '_'.join(aggregate_lookup[options.aggregate_demand].split(' ')) )
        logger.enable_file_logging( file_name_pattern + '__run.txt')
        info_file = file_name_pattern + "__info.txt"
        logger.log_status("Constrained Estimation with alternatvie sample size %s from %s for %s" % \
                          (alt_sample_size, options.sampler, aggregate_lookup[options.aggregate_demand]))
    elif options.sampler == 'stratified_sampler':
        file_name_pattern = date_time_str + "_alt_size_%s_per_stratum_%s_%s" % ( options.sample_size_from_each_stratum, 
                                                                                options.sampler, 
                                                                                '_'.join(aggregate_lookup[options.aggregate_demand].split(' ')) )
        logger.enable_file_logging( file_name_pattern + '__run.txt')
        info_file = file_name_pattern + "__info.txt"
        logger.log_status("Constrained Estimation with alt sample size %s per stratum from %s for %s" % \
                          (options.sample_size_from_each_stratum, options.sampler, aggregate_lookup[options.aggregate_demand]))
    
    if options.sampler is None:
        file_name_pattern = date_time_str + "_all_choice_%s" % '_'.join(aggregate_lookup[options.aggregate_demand].split(' '))
        logger.enable_file_logging( file_name_pattern + '__run.txt')
Esempio n. 21
0
    def run(
        self,
        resources,
        write_datasets_to_cache_at_end_of_year=True,
        log_file_name="run_model_system.log",
        cleanup_datasets=True,
    ):
        """Entries in resources: (entries with no defaults are required)
               models - a list containing names of models to be run. Each name
                           must correspond to the name of the module/class of that model. Default(object): None
               years - a tuple (start year, end year)
               debuglevel - an integer. The higher the more output will be printed. Default: 0
               expression_library - a dictionary.  The keys in the dictionary are pairs (dataset_name, variable_name)
               and the values are the corresponding expressions.  The model system needs to set the expression library
               (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions
               in the xml expression library.  Default: None
        This method is called both to start up the simulation for all years, and also for each year
        when running with one process per year.  In the latter case, 'years' consists of just
        (current_year, current_year) rather than the real start and end years for the simulation.
        """
        if not isinstance(resources, Resources):
            raise TypeError, "Argument 'resources' must be of type 'Resources'."
        logger_settings = resources.get("log", {"tags": [], "verbosity_level": 3})
        logger.set_tags(logger_settings.get("tags", []))
        logger.set_verbosity_level(logger_settings.get("verbosity_level", 3))
        self.simulation_state = SimulationState()
        self.simulation_state.set_low_memory_run(resources.get("low_memory_mode", False))
        self.simulation_state.set_start_time(resources.get("base_year", 0))
        self.run_year_namespace = {}

        if resources.get("cache_directory", None) is not None:
            self.simulation_state.set_cache_directory(resources["cache_directory"])

        if "expression_library" in resources:
            VariableFactory().set_expression_library(resources["expression_library"])

        if resources.get("sample_input", False):
            self.update_config_for_multiple_runs(resources)

        cache_directory = self.simulation_state.get_cache_directory()
        log_file = os.path.join(cache_directory, log_file_name)
        logger.enable_file_logging(log_file, verbose=False)
        try:
            logger.log_status("Cache Directory set to: " + cache_directory)

            with logger.block("Start simulation run"):
                models = resources.get("models", [])
                models_in_years = resources.get("models_in_year", {})

                resources.check_obligatory_keys(["years"])

                years = resources["years"]
                if (not isinstance(years, tuple)) and (not isinstance(years, list)):
                    raise TypeError, "Entry 'years' in resources must be a tuple."

                if len(years) < 2:
                    print years
                    raise StandardError, "Entry 'years' in resources must be of length at least 2."

                start_year = years[0]
                end_year = years[-1]

                debuglevel = resources.get("debuglevel", 0)
                seed_values = resources.get("seed", NO_SEED)

                logger.log_status("random seed = %s" % str(seed_values))
                seed(seed_values)

                for year in range(start_year, end_year + 1):
                    with logger.block("Starting simulation for year " + str(year)):
                        self.simulation_state.set_current_time(year)
                        SessionConfiguration().get_dataset_pool().remove_all_datasets()
                        logger.disable_file_logging(log_file)
                        try:
                            if models_in_years.get(year, None) is not None:
                                models_to_run = models_in_years[year]
                            else:
                                models_to_run = models
                            self._run_year(
                                year=year,
                                models=models_to_run,
                                simulation_state=self.simulation_state,
                                debuglevel=debuglevel,
                                resources=resources,
                                write_datasets_to_cache_at_end_of_year=write_datasets_to_cache_at_end_of_year,
                                cleanup_datasets=cleanup_datasets,
                            )
                        finally:
                            logger.enable_file_logging(log_file, verbose=False)
                        collect()

        finally:
            logger.disable_file_logging(log_file)
Esempio n. 22
0
    def _run_year(
        self,
        year,
        models,
        simulation_state,
        debuglevel,
        resources,
        write_datasets_to_cache_at_end_of_year,
        cleanup_datasets=True,
    ):
        """
        Assumes that all datasets resides in the cache directory in binary format.
        """
        try:
            import wingdbstub
        except:
            pass
        self.vardict = {}
        log_file_name = os.path.join(simulation_state.get_cache_directory(), "year_%s_log.txt" % year)
        logger.enable_file_logging(log_file_name, "w")
        try:
            logger.start_block("Simulate year %s" % year)
            try:
                base_year = resources["base_year"]
                if year == base_year:
                    year_for_base_year_cache = year  # case of estimation
                else:
                    year_for_base_year_cache = year - 1
                cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache)
                self.vardict["cache_storage"] = cache_storage
                base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year)
                self.vardict["base_cache_storage"] = base_cache_storage
                simulation_state.set_flush_datasets(resources.get("flush_variables", False))
                SessionConfiguration()["simulation_year"] = year
                SessionConfiguration()["debuglevel"] = debuglevel
                datasets_to_preload_in_year = resources.get("datasets_to_preload_in_year", {})
                if datasets_to_preload_in_year.get(year, None) is not None:
                    datasets_to_preload = datasets_to_preload_in_year[year]
                else:
                    datasets_to_preload = resources.get("datasets_to_preload", {})
                for dataset_name in datasets_to_preload:
                    SessionConfiguration().get_dataset_from_pool(dataset_name)
                models_configuration = resources.get("models_configuration", {})
                dataset_pool = SessionConfiguration().get_dataset_pool()
                datasets = {}
                for dataset_name, its_dataset in dataset_pool.datasets_in_pool().iteritems():
                    self.vardict[dataset_name] = its_dataset
                    datasets[dataset_name] = its_dataset
                    exec "%s=its_dataset" % dataset_name

                # This is needed. It resides in locals()
                # and is passed on to models as they run.
                ### TODO: There has got to be a better way!
                model_resources = Resources(datasets)
                n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run(
                    models, models_configuration
                )
                self.run_year_namespace = locals()
                # ==========
                # Run the models.
                # ==========
                model_number = -1
                for model_entry in models:
                    # list 'models' can be in the form:
                    # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
                    #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
                    #                                      'commercial']}},
                    #  {'model_name_3': ['estimate', 'run']},
                    #  'model_name_4',
                    #  {'model_name_5': {'group_members': 'all'}}
                    # ]
                    # get list of methods to be processed evtl. for each group member
                    if isinstance(model_entry, dict):
                        model_name, value = model_entry.items()[0]
                        if not isinstance(value, dict):  # is a model group
                            processes = value
                            if not isinstance(processes, list):
                                processes = [processes]
                    else:  # in the form 'model_name_4' in the comment above
                        model_name = model_entry
                        processes = ["run"]
                    group_member = None
                    model_group = model_group_members_to_run[model_name][1]
                    last_member = max(1, len(model_group_members_to_run[model_name][0].keys()))
                    for imember in range(last_member):
                        controller_config = models_configuration[model_name]["controller"]
                        model_configuration = models_configuration[model_name]
                        if model_group_members_to_run[model_name][0].keys():
                            group_member_name = model_group_members_to_run[model_name][0].keys()[imember]
                            group_member = ModelGroupMember(model_group, group_member_name)
                            processes = model_group_members_to_run[model_name][0][group_member_name]
                            member_model_name = "%s_%s" % (group_member_name, model_name)
                            if member_model_name in models_configuration.keys():
                                model_configuration = models_configuration[member_model_name]
                                if "controller" in model_configuration.keys():
                                    controller_config = model_configuration["controller"]
                        datasets_to_preload_for_this_model = controller_config.get(
                            "_model_structure_dependencies_", {}
                        ).get("dataset", [])
                        for dataset_name in datasets_to_preload_for_this_model:
                            try:
                                if not dataset_pool.has_dataset(dataset_name) or (dataset_name not in datasets.keys()):
                                    ds = dataset_pool.get_dataset(dataset_name)
                                    self.vardict[dataset_name] = ds
                                    datasets[dataset_name] = ds
                                    exec "%s=ds" % dataset_name
                            except:
                                logger.log_warning("Failed to load dataset %s." % dataset_name)
                        # import part
                        if "import" in controller_config.keys():
                            import_config = controller_config["import"]
                            for import_module in import_config.keys():
                                exec ("from %s import %s" % (import_module, import_config[import_module]))

                        # gui_import_replacements part
                        # This is a temporary hack -- replicates the functionality of the "import" section
                        # for use with the GUI.  The contents of this part of the config is a dictionary.
                        # Keys are names of models (not used here).  Values are 2 element pairs.
                        # The first element is a name and the second is a value.  Bind the name to the value.
                        if "gui_import_replacements" in controller_config.keys():
                            import_replacement_config = controller_config["gui_import_replacements"]
                            for model_name in import_replacement_config.keys():
                                pair = import_replacement_config[model_name]
                                temp = pair[1]
                                exec ("%s = temp") % pair[0]

                        # init part
                        model = self.do_init(locals())

                        # estimate and/or run part
                        for process in processes:
                            model_number = model_number + 1
                            # write status file
                            model.set_model_system_status_parameters(
                                year, n_models, model_number, resources.get("status_file_for_gui", None)
                            )
                            model.write_status_for_gui()
                            # prepare part
                            exec (self.do_prepare(locals()))
                            processmodel_config = controller_config[process]
                            if "output" in processmodel_config.keys():
                                outputvar = processmodel_config["output"]
                            else:
                                outputvar = "process_output"
                            self.vardict[outputvar] = self.do_process(locals())
                            exec outputvar + "=self.vardict[outputvar]"

                            # check command file from gui, if the simulation should be stopped or paused
                            self.do_commands_from_gui(resources.get("command_file_for_gui", None))

                            # capture namespace for interactive estimation
                            self.run_year_namespace = locals()
                            self.flush_datasets_after_model(resources)
                            del model
                            collect()

                # Write all datasets to cache.
                if write_datasets_to_cache_at_end_of_year:
                    logger.start_block("Writing datasets to cache for year %s" % year)
                    try:
                        for dataset_name, its_dataset in (
                            SessionConfiguration().get_dataset_pool().datasets_in_pool().iteritems()
                        ):
                            self.flush_dataset(its_dataset)
                    finally:
                        logger.end_block()

            finally:
                logger.end_block()
        finally:
            logger.disable_file_logging(log_file_name)

        if cleanup_datasets:
            SessionConfiguration().delete_datasets()
    def estimate(self,
                 spec_var=None,
                 spec_py=None,
                 submodel_string="workers",
                 agent_sample_rate=0.005,
                 alt_sample_size=None):
        """

        """
        CLOSE = 0.001
        sampler = "opus_core.samplers.weighted_sampler"
        if alt_sample_size == None:
            sampler = None

        date_time_str = strftime("%Y_%m_%d__%H_%M", localtime())
        agent_sample_rate_str = "__ASR_" + str(agent_sample_rate)
        alt_sample_size_str = "_ALT_" + str(alt_sample_size)
        info_file = date_time_str + agent_sample_rate_str + alt_sample_size_str + "__info.txt"
        logger.enable_file_logging(date_time_str + agent_sample_rate_str +
                                   alt_sample_size_str + "__run.txt")
        logger.enable_memory_logging()
        logger.log_status("Constrained Estimation with agent sample rate of %s and alternatvie sample size %s\n" % \
                          (agent_sample_rate, alt_sample_size))

        t1 = time()

        SimulationState().set_current_time(2000)

        self.nbs = SessionConfiguration().get_dataset_from_pool("neighborhood")
        self.hhs = SessionConfiguration().get_dataset_from_pool('household')

        depts, lambda_value = compute_lambda(self.nbs)
        supply, vacancy_rate = compute_supply_and_vacancy_rate(
            self.nbs, depts, lambda_value)
        self.nbs.set_values_of_one_attribute("supply", supply)
        dataset_pool = SessionConfiguration().get_dataset_pool()
        dataset_pool.add_datasets_if_not_included({
            'vacancy_rate':
            vacancy_rate,
            'sample_rate':
            agent_sample_rate
        })
        SessionConfiguration()["CLOSE"] = CLOSE
        SessionConfiguration()['info_file'] = info_file

        if self.save_estimation_results:
            out_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=self.out_con)

        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification

        if spec_var is not None:
            self.specification = load_specification_from_dictionary(spec_var)
        else:
            in_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=self.in_con)
            self.specification = EquationSpecification(in_storage=in_storage)
            self.specification.load(
                in_table_name="household_location_choice_model_specification")

        #submodel_string = "workers"

        seed(71)  # was: seed(71,110)
        self.model_name = "household_location_choice_model"

        model = HouseholdLocationChoiceModelCreator().get_model(
            location_set=self.nbs,
            submodel_string=submodel_string,
            sampler=sampler,
            estimation_size_agents=agent_sample_rate * 100 / 20,
            # proportion of the agent set that should be used for the estimation,
            #
            sample_size_locations=
            alt_sample_size,  # choice set size (includes current location)
            compute_capacity_flag=True,
            probabilities="opus_core.mnl_probabilities",
            choices="urbansim.lottery_choices",
            run_config=Resources({"capacity_string": "supply"}),
            estimate_config=Resources({
                "capacity_string": "supply",
                "compute_capacity_flag": True
            }))

        #TODO: since households_for_estimation currently is the same as households, create_households_for_estimation
        #becomes unnecesarry
        #agent_set, agents_index_for_estimation  =  create_households_for_estimation(self.hhs, self.in_con)
        agent_set = self.hhs
        agents_index_for_estimation = arange(self.hhs.size())
        self.result = model.estimate(
            self.specification,
            agent_set=agent_set,
            agents_index=agents_index_for_estimation,
            debuglevel=self.debuglevel,
            procedure="urbansim.constrain_estimation_bhhh_two_loops"
        )  #"urbansim.constrain_estimation_bhhh"

        #save estimation results
        if self.save_estimation_results:
            self.save_results(out_storage)

        logger.log_status("Estimation done. " + str(time() - t1) + " s")
Esempio n. 24
0
    def create_prediction_success_table(self, 
                                        summarize_by=None, 
                                        predicted_choice_id_name=None,
                                        predicted_choice_id_prefix="predicted_",
                                        log_to_file=None,
                                        force_predict=True):
        agents = self.get_agent_set()
        choices = self.get_choice_set()
        choice_id_name = choices.get_id_name()[0]
        
        if self.agents_index_for_prediction is not None:
            agents_index = self.agents_index_for_prediction
        else:
            agents_index = self.get_agent_set_index()

        if predicted_choice_id_name is None or len(predicted_choice_id_name) == 0:
            predicted_choice_id_name = predicted_choice_id_prefix + choice_id_name
            
        if force_predict or (predicted_choice_id_name not in agents.get_known_attribute_names()):
            if not self.predict(predicted_choice_id_name=predicted_choice_id_name,
                                agents_index=agents_index
                                ):
                logger.log_error("Failed to run simulation for prediction; unable to create prediction success table.")
                return

        if log_to_file is not None and len(log_to_file) > 0:
            logger.enable_file_logging(log_to_file)
            
        ## by default, compare predicted choice with observed choice
        ## this is not feasible for location choice model, where the 
        ## alternative set is too large to be useful
        if summarize_by is None:
            summarize_by = "%s.%s" % (agents.dataset_name, choice_id_name)
            
        summarize_dataset_name = VariableName(summarize_by).get_dataset_name()
        if summarize_dataset_name == choices.dataset_name:
            summary_id = choices.compute_variables(summarize_by)
            
            chosen_choice_id = agents.get_attribute_by_index(choices.get_id_name()[0], agents_index)
            predicted_choice_id = agents.get_attribute_by_index(predicted_choice_id_name, agents_index)
            chosen_choice_index = choices.get_id_index(chosen_choice_id)
            predicted_choice_index = choices.get_id_index(predicted_choice_id)
            
            chosen_summary_id = summary_id[chosen_choice_index]
            predicted_summary_id = summary_id[predicted_choice_index]
    
            unique_summary_id = unique(summary_id)
        elif summarize_dataset_name == agents.dataset_name:
            chosen_summary_id = agents.compute_variables(summarize_by)[agents_index]
            
            chosen_choice_id = agents.get_attribute(choice_id_name).copy()
            predicted_choice_id = agents.get_attribute(predicted_choice_id_name)
            agents.modify_attribute(name=choice_id_name, data=predicted_choice_id)
            predicted_summary_id = agents.compute_variables(summarize_by)[agents_index]
            
            agents.modify_attribute(name=choice_id_name, data=chosen_choice_id)
    
            unique_summary_id = unique( concatenate((chosen_summary_id, predicted_summary_id)) )
        else:
            logger.log_error("summarize_by expression '%s' is specified for dataset %s, which is neither the choice_set '%s' nor the agent_set '%s'." 
                             % (summarize_by, summarize_dataset_name, choices.dataset_name, agents.dataset_name))
            return False
            
        # observed on row, predicted on column
        prediction_matrix = zeros( (unique_summary_id.size, unique_summary_id.size), dtype="int32" )

        def _convert_array_to_tab_delimited_string(an_array):
            from numpy import dtype
            if an_array.dtype == dtype('f'):
                return "\t".join(["%5.4f" % item for item in an_array])
            return "\t".join([str(item) for item in an_array])
        
        logger.log_status("Observed_id\tSuccess_rate\t%s" % \
                          _convert_array_to_tab_delimited_string(unique_summary_id) )
        i = 0
        success_rate = zeros( unique_summary_id.size, dtype="float32" )
        for observed_id in unique_summary_id:
            predicted_id = predicted_summary_id[chosen_summary_id==observed_id]
            prediction_matrix[i] = ndimage.sum(ones(predicted_id.size), labels=predicted_id, index=unique_summary_id )
            if prediction_matrix[i].sum() > 0:
                if prediction_matrix[i].sum() > 0:
                    success_rate[i] = float(prediction_matrix[i, i]) / prediction_matrix[i].sum()
                else:
                    success_rate[i] = 0
            logger.log_status("%s\t\t%5.4f\t\t%s" % (observed_id, success_rate[i], 
                                              _convert_array_to_tab_delimited_string(prediction_matrix[i]) ) )
            i+=1

        success_rate2 = zeros( i, dtype="float32" )
        for j in range(i):
            if prediction_matrix[j, :].sum() > 0:
                success_rate2[j]=float(prediction_matrix[:,j].sum()) / prediction_matrix[j, :].sum()
            else:
                success_rate2[j]=0
        logger.log_status("%s\t\t%s\t\t%s" % (' ', ' ', 
                                                 _convert_array_to_tab_delimited_string( success_rate2 ) ))
        logger.disable_file_logging(filename=log_to_file)
    root_seed = config.get("seed", None)
    seed(root_seed)
    # generate different seed for each run (each seed contains 1 number)
    seed_array = randint(1,2**30, number_of_runs)
    list_of_cache_directories = []
    for irun in range(number_of_runs):
        config['seed']= (seed_array[irun],)
        this_config = config.copy()
        if ((irun + 1) % number_of_runs_in_parallel) == 0:
            run_in_background = False
        else:
            run_in_background = True
        run_manager.setup_new_run(cache_directory = this_config['cache_directory'],
                                  configuration = this_config)
        run_manager.run_run(this_config, run_as_multiprocess=False,
                            run_in_background=run_in_background)
        if irun == 0:
            # log file for the multiple runs will be located in the first cache
            first_cache_directory = this_config['cache_directory']
            log_file = os.path.join(first_cache_directory, 'multiple_runs.log')
            logger.enable_file_logging(log_file)
            logger.log_status("Multiple runs: %s replications" % number_of_runs)
            logger.log_status("root random seed = %s" % str(root_seed))
        else:
            logger.enable_file_logging(log_file, verbose=False)

        logger.log_status("Run %s: %s" % (irun+1, this_config['cache_directory']))
        logger.disable_file_logging(log_file)
        list_of_cache_directories.append(this_config['cache_directory'])
        write_to_text_file(os.path.join(first_cache_directory,"cache_directories"),
                                list_of_cache_directories)
Esempio n. 26
0
    def create_prediction_success_table(
            self,
            summarize_by=None,
            predicted_choice_id_name=None,
            predicted_choice_id_prefix="predicted_",
            log_to_file=None,
            force_predict=True):
        agents = self.get_agent_set()
        choices = self.get_choice_set()
        choice_id_name = choices.get_id_name()[0]

        if self.agents_index_for_prediction is not None:
            agents_index = self.agents_index_for_prediction
        else:
            agents_index = self.get_agent_set_index()

        if predicted_choice_id_name is None or len(
                predicted_choice_id_name) == 0:
            predicted_choice_id_name = predicted_choice_id_prefix + choice_id_name

        if force_predict or (predicted_choice_id_name
                             not in agents.get_known_attribute_names()):
            if not self.predict(
                    predicted_choice_id_name=predicted_choice_id_name,
                    agents_index=agents_index):
                logger.log_error(
                    "Failed to run simulation for prediction; unable to create prediction success table."
                )
                return

        if log_to_file is not None and len(log_to_file) > 0:
            logger.enable_file_logging(log_to_file)

        ## by default, compare predicted choice with observed choice
        ## this is not feasible for location choice model, where the
        ## alternative set is too large to be useful
        if summarize_by is None:
            summarize_by = "%s.%s" % (agents.dataset_name, choice_id_name)

        summarize_dataset_name = VariableName(summarize_by).get_dataset_name()
        if summarize_dataset_name == choices.dataset_name:
            summary_id = choices.compute_variables(summarize_by)

            chosen_choice_id = agents.get_attribute_by_index(
                choices.get_id_name()[0], agents_index)
            predicted_choice_id = agents.get_attribute_by_index(
                predicted_choice_id_name, agents_index)
            chosen_choice_index = choices.get_id_index(chosen_choice_id)
            predicted_choice_index = choices.get_id_index(predicted_choice_id)

            chosen_summary_id = summary_id[chosen_choice_index]
            predicted_summary_id = summary_id[predicted_choice_index]

            unique_summary_id = unique(summary_id)
        elif summarize_dataset_name == agents.dataset_name:
            chosen_summary_id = agents.compute_variables(
                summarize_by)[agents_index]

            chosen_choice_id = agents.get_attribute(choice_id_name).copy()
            predicted_choice_id = agents.get_attribute(
                predicted_choice_id_name)
            agents.modify_attribute(name=choice_id_name,
                                    data=predicted_choice_id)
            predicted_summary_id = agents.compute_variables(
                summarize_by)[agents_index]

            agents.modify_attribute(name=choice_id_name, data=chosen_choice_id)

            unique_summary_id = unique(
                concatenate((chosen_summary_id, predicted_summary_id)))
        else:
            logger.log_error(
                "summarize_by expression '%s' is specified for dataset %s, which is neither the choice_set '%s' nor the agent_set '%s'."
                % (summarize_by, summarize_dataset_name, choices.dataset_name,
                   agents.dataset_name))
            return False

        # observed on row, predicted on column
        prediction_matrix = zeros(
            (unique_summary_id.size, unique_summary_id.size), dtype="int32")

        def _convert_array_to_tab_delimited_string(an_array):
            from numpy import dtype
            if an_array.dtype == dtype('f'):
                return "\t".join(["%5.4f" % item for item in an_array])
            return "\t".join([str(item) for item in an_array])

        logger.log_status("Observed_id\tSuccess_rate\t%s" % \
                          _convert_array_to_tab_delimited_string(unique_summary_id) )
        i = 0
        success_rate = zeros(unique_summary_id.size, dtype="float32")
        for observed_id in unique_summary_id:
            predicted_id = predicted_summary_id[chosen_summary_id ==
                                                observed_id]
            prediction_matrix[i] = ndimage.sum(ones(predicted_id.size),
                                               labels=predicted_id,
                                               index=unique_summary_id)
            if prediction_matrix[i].sum() > 0:
                if prediction_matrix[i].sum() > 0:
                    success_rate[i] = float(
                        prediction_matrix[i, i]) / prediction_matrix[i].sum()
                else:
                    success_rate[i] = 0
            logger.log_status(
                "%s\t\t%5.4f\t\t%s" %
                (observed_id, success_rate[i],
                 _convert_array_to_tab_delimited_string(prediction_matrix[i])))
            i += 1

        success_rate2 = zeros(i, dtype="float32")
        for j in range(i):
            if prediction_matrix[j, :].sum() > 0:
                success_rate2[j] = float(prediction_matrix[:, j].sum()
                                         ) / prediction_matrix[j, :].sum()
            else:
                success_rate2[j] = 0
        logger.log_status(
            "%s\t\t%s\t\t%s" %
            (' ', ' ', _convert_array_to_tab_delimited_string(success_rate2)))
        logger.disable_file_logging(filename=log_to_file)
Esempio n. 27
0
    def _run_year(self,
                  year,
                  models,
                  simulation_state,
                  debuglevel,
                  resources,
                  write_datasets_to_cache_at_end_of_year,
                  cleanup_datasets=True):
        """
        Assumes that all datasets resides in the cache directory in binary format.
        """
        try:
            import wingdbstub
        except:
            pass
        self.vardict = {}
        log_file_name = os.path.join(simulation_state.get_cache_directory(),
                                     "year_%s_log.txt" % year)
        logger.enable_file_logging(log_file_name, 'w')
        try:
            logger.start_block('Simulate year %s' % year)
            try:
                base_year = resources['base_year']
                if year == base_year:
                    year_for_base_year_cache = year  # case of estimation
                else:
                    year_for_base_year_cache = year - 1
                cache_storage = AttributeCache().get_flt_storage_for_year(
                    year_for_base_year_cache)
                self.vardict['cache_storage'] = cache_storage
                base_cache_storage = AttributeCache().get_flt_storage_for_year(
                    base_year)
                self.vardict['base_cache_storage'] = base_cache_storage
                simulation_state.set_flush_datasets(
                    resources.get("flush_variables", False))
                SessionConfiguration()["simulation_year"] = year
                SessionConfiguration()["debuglevel"] = debuglevel
                datasets_to_preload_in_year = resources.get(
                    'datasets_to_preload_in_year', {})
                if datasets_to_preload_in_year.get(year, None) is not None:
                    datasets_to_preload = datasets_to_preload_in_year[year]
                else:
                    datasets_to_preload = resources.get(
                        'datasets_to_preload', {})
                for dataset_name in datasets_to_preload:
                    SessionConfiguration().get_dataset_from_pool(dataset_name)
                models_configuration = resources.get('models_configuration',
                                                     {})
                dataset_pool = SessionConfiguration().get_dataset_pool()
                datasets = {}
                for dataset_name, its_dataset in dataset_pool.datasets_in_pool(
                ).iteritems():
                    self.vardict[dataset_name] = its_dataset
                    datasets[dataset_name] = its_dataset
                    exec '%s=its_dataset' % dataset_name

                # This is needed. It resides in locals()
                # and is passed on to models as they run.
                ### TODO: There has got to be a better way!
                model_resources = Resources(datasets)
                n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run(
                    models, models_configuration)
                self.run_year_namespace = locals()
                #==========
                # Run the models.
                #==========
                model_number = -1
                for model_entry in models:
                    # list 'models' can be in the form:
                    # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
                    #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
                    #                                      'commercial']}},
                    #  {'model_name_3': ['estimate', 'run']},
                    #  'model_name_4',
                    #  {'model_name_5': {'group_members': 'all'}}
                    # ]
                    # get list of methods to be processed evtl. for each group member
                    if isinstance(model_entry, dict):
                        model_name, value = model_entry.items()[0]
                        if not isinstance(value, dict):  # is a model group
                            processes = value
                            if not isinstance(processes, list):
                                processes = [processes]
                    else:  # in the form 'model_name_4' in the comment above
                        model_name = model_entry
                        processes = ["run"]
                    group_member = None
                    model_group = model_group_members_to_run[model_name][1]
                    last_member = max(
                        1,
                        len(model_group_members_to_run[model_name][0].keys()))
                    for imember in range(last_member):
                        controller_config = models_configuration[model_name][
                            "controller"]
                        model_configuration = models_configuration[model_name]
                        if model_group_members_to_run[model_name][0].keys():
                            group_member_name = model_group_members_to_run[
                                model_name][0].keys()[imember]
                            group_member = ModelGroupMember(
                                model_group, group_member_name)
                            processes = model_group_members_to_run[model_name][
                                0][group_member_name]
                            member_model_name = "%s_%s" % (group_member_name,
                                                           model_name)
                            if member_model_name in models_configuration.keys(
                            ):
                                model_configuration = models_configuration[
                                    member_model_name]
                                if "controller" in model_configuration.keys():
                                    controller_config = model_configuration[
                                        "controller"]
                        datasets_to_preload_for_this_model = controller_config.get(
                            '_model_structure_dependencies_',
                            {}).get('dataset', [])
                        for dataset_name in datasets_to_preload_for_this_model:
                            try:
                                if not dataset_pool.has_dataset(
                                        dataset_name) or (
                                            dataset_name
                                            not in datasets.keys()):
                                    ds = dataset_pool.get_dataset(dataset_name)
                                    self.vardict[dataset_name] = ds
                                    datasets[dataset_name] = ds
                                    exec '%s=ds' % dataset_name
                            except:
                                logger.log_warning(
                                    'Failed to load dataset %s.' %
                                    dataset_name)
                        # import part
                        if "import" in controller_config.keys():
                            import_config = controller_config["import"]
                            for import_module in import_config.keys():
                                exec("from %s import %s" %
                                     (import_module,
                                      import_config[import_module]))

                        # gui_import_replacements part
                        # This is a temporary hack -- replicates the functionality of the "import" section
                        # for use with the GUI.  The contents of this part of the config is a dictionary.
                        # Keys are names of models (not used here).  Values are 2 element pairs.
                        # The first element is a name and the second is a value.  Bind the name to the value.
                        if "gui_import_replacements" in controller_config.keys(
                        ):
                            import_replacement_config = controller_config[
                                "gui_import_replacements"]
                            for model_name in import_replacement_config.keys():
                                pair = import_replacement_config[model_name]
                                temp = pair[1]
                                exec("%s = temp") % pair[0]

                        # init part
                        model = self.do_init(locals())

                        # estimate and/or run part
                        for process in processes:
                            model_number = model_number + 1
                            # write status file
                            model.set_model_system_status_parameters(
                                year, n_models, model_number,
                                resources.get('status_file_for_gui', None))
                            model.write_status_for_gui()
                            # prepare part
                            exec(self.do_prepare(locals()))
                            processmodel_config = controller_config[process]
                            if "output" in processmodel_config.keys():
                                outputvar = processmodel_config["output"]
                            else:
                                outputvar = "process_output"
                            self.vardict[outputvar] = self.do_process(locals())
                            exec outputvar + '=self.vardict[outputvar]'

                            # check command file from gui, if the simulation should be stopped or paused
                            self.do_commands_from_gui(
                                resources.get('command_file_for_gui', None))

                            # capture namespace for interactive estimation
                            self.run_year_namespace = locals()
                            self.flush_datasets_after_model(resources)
                            del model
                            collect()

                # Write all datasets to cache.
                if write_datasets_to_cache_at_end_of_year:
                    logger.start_block(
                        'Writing datasets to cache for year %s' % year)
                    try:
                        for dataset_name, its_dataset in SessionConfiguration(
                        ).get_dataset_pool().datasets_in_pool().iteritems():
                            self.flush_dataset(its_dataset)
                    finally:
                        logger.end_block()

            finally:
                logger.end_block()
        finally:
            logger.disable_file_logging(log_file_name)

        if cleanup_datasets:
            SessionConfiguration().delete_datasets()
Esempio n. 28
0
    def run(self,
            resources,
            write_datasets_to_cache_at_end_of_year=True,
            log_file_name='run_model_system.log',
            cleanup_datasets=True):
        """Entries in resources: (entries with no defaults are required)
               models - a list containing names of models to be run. Each name
                           must correspond to the name of the module/class of that model. Default(object): None
               years - a tuple (start year, end year)
               debuglevel - an integer. The higher the more output will be printed. Default: 0
               expression_library - a dictionary.  The keys in the dictionary are pairs (dataset_name, variable_name)
               and the values are the corresponding expressions.  The model system needs to set the expression library
               (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions
               in the xml expression library.  Default: None
        This method is called both to start up the simulation for all years, and also for each year
        when running with one process per year.  In the latter case, 'years' consists of just
        (current_year, current_year) rather than the real start and end years for the simulation.
        """
        if not isinstance(resources, Resources):
            raise TypeError, "Argument 'resources' must be of type 'Resources'."
        logger_settings = resources.get("log", {
            "tags": [],
            "verbosity_level": 3
        })
        logger.set_tags(logger_settings.get("tags", []))
        logger.set_verbosity_level(logger_settings.get("verbosity_level", 3))
        self.simulation_state = SimulationState()
        self.simulation_state.set_low_memory_run(
            resources.get("low_memory_mode", False))
        self.simulation_state.set_start_time(resources.get("base_year", 0))
        self.run_year_namespace = {}

        if resources.get('cache_directory', None) is not None:
            self.simulation_state.set_cache_directory(
                resources['cache_directory'])

        if 'expression_library' in resources:
            VariableFactory().set_expression_library(
                resources['expression_library'])

        if resources.get('sample_input', False):
            self.update_config_for_multiple_runs(resources)

        cache_directory = self.simulation_state.get_cache_directory()
        log_file = os.path.join(cache_directory, log_file_name)
        logger.enable_file_logging(log_file, verbose=False)
        try:
            logger.log_status("Cache Directory set to: " + cache_directory)

            with logger.block('Start simulation run'):
                models = resources.get("models", [])
                models_in_years = resources.get("models_in_year", {})

                resources.check_obligatory_keys(["years"])

                years = resources["years"]
                if (not isinstance(years, tuple)) and (not isinstance(
                        years, list)):
                    raise TypeError, "Entry 'years' in resources must be a tuple."

                if len(years) < 2:
                    print years
                    raise StandardError, "Entry 'years' in resources must be of length at least 2."

                start_year = years[0]
                end_year = years[-1]

                debuglevel = resources.get("debuglevel", 0)
                seed_values = resources.get('seed', NO_SEED)

                logger.log_status("random seed = %s" % str(seed_values))
                seed(seed_values)

                for year in range(start_year, end_year + 1):
                    with logger.block("Starting simulation for year " +
                                      str(year)):
                        self.simulation_state.set_current_time(year)
                        SessionConfiguration().get_dataset_pool(
                        ).remove_all_datasets()
                        logger.disable_file_logging(log_file)
                        try:
                            if models_in_years.get(year, None) is not None:
                                models_to_run = models_in_years[year]
                            else:
                                models_to_run = models
                            self._run_year(
                                year=year,
                                models=models_to_run,
                                simulation_state=self.simulation_state,
                                debuglevel=debuglevel,
                                resources=resources,
                                write_datasets_to_cache_at_end_of_year=
                                write_datasets_to_cache_at_end_of_year,
                                cleanup_datasets=cleanup_datasets)
                        finally:
                            logger.enable_file_logging(log_file, verbose=False)
                        collect()

        finally:
            logger.disable_file_logging(log_file)
    aggregate_lookup = {
        True: 'aggregate demand',
        False: 'aggregate choice set'
    }

    date_time_str = strftime("%Y_%m_%d__%H_%M", localtime())
    #agent_sample_rate_str = '' #"__ASR_" + str(agent_sample_rate)
    #alt_sample_size_str = "_ALT_" + str(alt_sample_size)
    logger.enable_memory_logging()

    if options.sampler == 'weighted_sampler':
        file_name_pattern = date_time_str + "_alt_size_%s_%s_%s" % (
            alt_sample_size, options.sampler, '_'.join(
                aggregate_lookup[options.aggregate_demand].split(' ')))
        logger.enable_file_logging(file_name_pattern + '__run.txt')
        info_file = file_name_pattern + "__info.txt"
        logger.log_status("Constrained Estimation with alternatvie sample size %s from %s for %s" % \
                          (alt_sample_size, options.sampler, aggregate_lookup[options.aggregate_demand]))
    elif options.sampler == 'stratified_sampler':
        file_name_pattern = date_time_str + "_alt_size_%s_per_stratum_%s_%s" % (
            options.sample_size_from_each_stratum, options.sampler, '_'.join(
                aggregate_lookup[options.aggregate_demand].split(' ')))
        logger.enable_file_logging(file_name_pattern + '__run.txt')
        info_file = file_name_pattern + "__info.txt"
        logger.log_status("Constrained Estimation with alt sample size %s per stratum from %s for %s" % \
                          (options.sample_size_from_each_stratum, options.sampler, aggregate_lookup[options.aggregate_demand]))

    if options.sampler is None:
        file_name_pattern = date_time_str + "_all_choice_%s" % '_'.join(
            aggregate_lookup[options.aggregate_demand].split(' '))