Esempio n. 1
0
    def __init__(self, config):
        ss = SimulationState(new_instance=True)
        ss.set_current_time(config['base_year'])
        ss.set_cache_directory(config['cache_directory'])

        SessionConfiguration(new_instance=True,
                             package_order=config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        #if not os.path.exists(config['cache_directory']):  ## if cache exists, it will automatically skip
        cacher = CreateBaseyearCache()
        cache_dir = cacher.run(config)

        if 'estimation_database_configuration' in config:
            db_server = DatabaseServer(config['estimation_database_configuration'])
            db = db_server.get_database(config['estimation_database_configuration'].database_name)
            out_storage = StorageFactory().get_storage(
                'sql_storage', 
                storage_location = db)
        else:
            output_cache = os.path.join(config['cache_directory'], str(config['base_year']+1))
            out_storage = StorageFactory().get_storage('flt_storage', storage_location=output_cache)

        dataset_pool = SessionConfiguration().get_dataset_pool()
        households = dataset_pool.get_dataset("household")
        buildings = dataset_pool.get_dataset("building")
        zones = dataset_pool.get_dataset("zone")
        zone_ids = zones.get_id_attribute()
        capacity_attribute_name = "residential_units"  #_of_use_id_%s" % id
        capacity_variable_name = "%s=sanfrancisco.zone.aggregate_%s_from_building" % \
                                 (capacity_attribute_name, capacity_attribute_name)
        buildings.compute_variables("sanfrancisco.building.zone_id", dataset_pool=dataset_pool)
        zones.compute_variables(capacity_variable_name, dataset_pool=dataset_pool)

        building_zone_id = buildings.get_attribute('zone_id')
        
#        is_household_unplace = datasets['household'].get_attribute("building_id") <= 0
        is_household_unplaced = 1 #all households are unplaced
        household_building_id = zeros(households.size(), dtype='int32')-1 #datasets['household'].get_attribute("building_id")
        
        for zone_id in zone_ids:
            capacity = zones.get_attribute_by_id(capacity_attribute_name, zone_id)
            is_household_in_this_zone = (households.get_attribute('zone_id') == zone_id)
            is_unplaced_household_in_this_zone = is_household_in_this_zone * is_household_unplaced
            is_building_in_this_zone = (building_zone_id == zone_id)
#            if not is_household_in_this_zone.sum() <= capacity:
            if capacity == 0 or is_household_in_this_zone.sum()==0:
                print "WARNING: zone %s has %s households but only %s units" % (zone_id, is_household_in_this_zone.sum(), capacity)
                continue
                        
            prob = buildings.get_attribute(capacity_attribute_name) * is_building_in_this_zone / array(capacity, dtype=float64)

            r = random(sum(is_unplaced_household_in_this_zone))
            prob_cumsum = ncumsum(prob)
            index_to_bldg = searchsorted(prob_cumsum, r)

            household_building_id[where(is_unplaced_household_in_this_zone)] = buildings.get_attribute_by_index('building_id', index_to_bldg)

#        import pdb;pdb.set_trace()
        households.set_values_of_one_attribute('building_id', household_building_id)
        households.write_dataset(out_table_name='households', out_storage=out_storage)
    def run(self, config, year, *args, **kwargs):
        """This is the main entry point.  It gets the appropriate configuration info from the 
        travel_model_configuration part of this config, and then copies the specified 
        UrbanSim data into files for travel mdel to read.  
        """
        cache_directory = config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(new_instance=True,
                                            package_order=config['dataset_pool_configuration'].package_order,
                                            in_storage=attribute_cache).get_dataset_pool()

        #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache)
        #datasets = DatasetFactory().create_datasets_from_flt(config.get('datasets_to_preload',{}),
                                                            #"urbansim",
                                                            #additional_arguments={'in_storage': attribute_cache})
        zone_set = dataset_pool.get_dataset('travel_zone')
        self.prepare_for_run(config['travel_model_configuration'], year)
        self.create_travel_model_input_file(config=config, 
                                            year=year, 
                                            zone_set=zone_set, 
                                            datasets=dataset_pool,
                                            *args, **kwargs)
    def __init__(self, config):
        ss = SimulationState(new_instance=True)
        ss.set_current_time(config['base_year'])
        ss.set_cache_directory(config['cache_directory'])

        SessionConfiguration(new_instance=True,
                             package_order=config['dataset_pool_configuration'].package_order,
                             in_storage=AttributeCache())
        #if not os.path.exists(config['cache_directory']):  ## if cache exists, it will automatically skip
        cacher = CreateBaseyearCache()
        cache_dir = cacher.run(config)

        if 'estimation_database_configuration' in config:
            db_server = DatabaseServer(config['estimation_database_configuration'])
            db = db_server.get_database(config['estimation_database_configuration'].database_name)
            out_storage = StorageFactory().get_storage(
                'sql_storage', 
                storage_location = db)
        else:
            output_cache = os.path.join(config['cache_directory'], str(config['base_year']+1))
            out_storage = StorageFactory().get_storage('flt_storage', storage_location=output_cache)

        dataset_pool = SessionConfiguration().get_dataset_pool()
        households = dataset_pool.get_dataset("household")
        buildings = dataset_pool.get_dataset("building")
        zones = dataset_pool.get_dataset("zone")
        zone_ids = zones.get_id_attribute()
        capacity_attribute_name = "residential_units"  #_of_use_id_%s" % id
        capacity_variable_name = "%s=sanfrancisco.zone.aggregate_%s_from_building" % \
                                 (capacity_attribute_name, capacity_attribute_name)
        buildings.compute_variables("sanfrancisco.building.zone_id", dataset_pool=dataset_pool)
        zones.compute_variables(capacity_variable_name, dataset_pool=dataset_pool)

        building_zone_id = buildings.get_attribute('zone_id')
        
#        is_household_unplace = datasets['household'].get_attribute("building_id") <= 0
        is_household_unplaced = 1 #all households are unplaced
        household_building_id = zeros(households.size(), dtype='int32')-1 #datasets['household'].get_attribute("building_id")
        
        for zone_id in zone_ids:
            capacity = zones.get_attribute_by_id(capacity_attribute_name, zone_id)
            is_household_in_this_zone = (households.get_attribute('zone_id') == zone_id)
            is_unplaced_household_in_this_zone = is_household_in_this_zone * is_household_unplaced
            is_building_in_this_zone = (building_zone_id == zone_id)
#            if not is_household_in_this_zone.sum() <= capacity:
            if capacity == 0 or is_household_in_this_zone.sum()==0:
                print "WARNING: zone %s has %s households but only %s units" % (zone_id, is_household_in_this_zone.sum(), capacity)
                continue
                        
            prob = buildings.get_attribute(capacity_attribute_name) * is_building_in_this_zone / array(capacity, dtype=float64)

            r = random(sum(is_unplaced_household_in_this_zone))
            prob_cumsum = ncumsum(prob)
            index_to_bldg = searchsorted(prob_cumsum, r)

            household_building_id[where(is_unplaced_household_in_this_zone)] = buildings.get_attribute_by_index('building_id', index_to_bldg)

#        import pdb;pdb.set_trace()
        households.set_values_of_one_attribute('building_id', household_building_id)
        households.write_dataset(out_table_name='households', out_storage=out_storage)
    def run(self, config, show_output = False):
        logger.log_status("Caching large SQL tables to: " + config['cache_directory'])
        self.show_output = show_output
        
        #import pydevd;pydevd.settrace()
        
        server_configuration = config['scenario_database_configuration']
        
        scenario_database_manager = ScenarioDatabaseManager(
            server_configuration = server_configuration, 
            base_scenario_database_name = server_configuration.database_name                                                         
        )
        
        self.database_server = DatabaseServer(server_configuration)
        
        database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping()
        
        self.tables_to_cache = config['creating_baseyear_cache_configuration'].tables_to_cache
                
        simulation_state = SimulationState()
        if 'low_memory_run' in config:
            simulation_state.set_low_memory_run(config['low_memory_run'])
        simulation_state.set_cache_directory(config['cache_directory'])
        simulation_state.set_current_time(config['base_year'])
                  
        self.tables_cached = set()      
        for database_name, tables in database_to_table_mapping.items():
            self.cache_database_tables(config, database_name, tables)

        un_cached_tables = set(self.tables_to_cache) - self.tables_cached
        if un_cached_tables:
            logger.log_warning('The following requested tables were NOT cached:')
            for table_name in un_cached_tables:
                logger.log_warning('\t%s' % table_name)
    def __init__(self, config):
        if 'estimation_database_configuration' in config:
            db_server = DatabaseServer(config['estimation_database_configuration'])
            db = db_server.get_database(config['estimation_database_configuration'].database_name)
        
            out_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=db)
        else:
            out_storage = StorageFactory().get_storage(type='flt_storage',
                storage_location=os.path.join(config['cache_directory'], str(config['base_year']+1)))

        simulation_state = SimulationState()
        simulation_state.set_cache_directory(config['cache_directory'])
        simulation_state.set_current_time(config['base_year'])
        attribute_cache = AttributeCache()
        
        SessionConfiguration(new_instance=True,
                             package_order=config['dataset_pool_configuration'].package_order,
                             in_storage=attribute_cache)
        
        if not os.path.exists(os.path.join(config['cache_directory'], str(config['base_year']))):
            #raise RuntimeError, "datasets uncached; run prepare_estimation_data.py first"
            CacheScenarioDatabase().run(config, unroll_gridcells=False)

        for dataset_name in config['datasets_to_preload']:
            SessionConfiguration().get_dataset_from_pool(dataset_name)

        households = SessionConfiguration().get_dataset_from_pool("household")
        household_ids = households.get_id_attribute()
        workers = households.get_attribute("workers")
        
        hh_ids = []
        member_ids = []
        is_worker = []
        job_ids = []

        for i in range(households.size()):  
            if workers[i] > 0:
                hh_ids += [household_ids[i]] * workers[i]
                member_ids += range(1, workers[i]+1)
                is_worker += [1] * workers[i]
                job_ids += [-1] * workers[i]

        in_storage = StorageFactory().get_storage('dict_storage')
        
        persons_table_name = 'persons'
        in_storage.write_table(
                table_name=persons_table_name,
                table_data={
                    'person_id':arange(len(hh_ids))+1,
                    'household_id':array(hh_ids),
                    'member_id':array(member_ids),
                    'is_worker':array(is_worker),                    
                    'job_id':array(job_ids),
                    },
            )

        persons = PersonDataset(in_storage=in_storage, in_table_name=persons_table_name)
        persons.write_dataset(out_storage=out_storage, out_table_name=persons_table_name)
Esempio n. 6
0
 def run(self, year, cache_directory=None):
     """The class is initialized with the appropriate configuration info from the 
     travel_model_configuration part of this config, and then copies the specified 
     UrbanSim data into files for daysim to read.
     The variables/expressions to export are defined in the node travel_model_configuration/urbansim_to_tm_variable_mapping
     of the configuration file.
     """
     if cache_directory is None:
         cache_directory = self.config['cache_directory']
     simulation_state = SimulationState()
     simulation_state.set_cache_directory(cache_directory)
     simulation_state.set_current_time(year)
     attribute_cache = AttributeCache()
     sc = SessionConfiguration(new_instance=True,
                               package_order=self.config['dataset_pool_configuration'].package_order,
                               in_storage=attribute_cache)
     dataset_pool = sc.get_dataset_pool()
     tm_config = self.config['travel_model_configuration']
     data_to_export = tm_config['urbansim_to_tm_variable_mapping']
     
     table_names = data_to_export.keys()
     variable_names = {}
     datasets = {}
     filenames = {}
     in_table_names = {}
     for table_name in table_names:
         filter = data_to_export[table_name].get('__filter__', None)
         if filter is not None:
             del data_to_export[table_name]['__filter__']
         out_table_name = data_to_export[table_name].get('__out_table_name__', None)
         if out_table_name is not None:
             del data_to_export[table_name]['__out_table_name__']
         else:
             out_table_name = table_name
         variables_to_export = map(lambda alias: "%s = %s" % (alias, data_to_export[table_name][alias]), data_to_export[table_name].keys())
         dataset_name = None            
         for var in variables_to_export:
             var_name = VariableName(var)
             if dataset_name is None:
                 dataset_name = var_name.get_dataset_name()
                 ds = dataset_pool.get_dataset(dataset_name)
                 
                 datasets[dataset_name] = ds
                 filenames[dataset_name] = out_table_name
                 in_table_names[dataset_name] = table_name
                 if dataset_name not in variable_names.keys():
                     variable_names[dataset_name] = []
             variable_names[dataset_name].append(var_name.get_alias())                
             ds.compute_variables([var_name], dataset_pool=dataset_pool)
         if filter is not None:
             filter_idx = where(ds.compute_variables(["__filter__ = %s" % filter], dataset_pool=dataset_pool)>0)[0]
             ds = DatasetSubset(ds, index = filter_idx)
             datasets[dataset_name] = ds
             
     return self._call_input_file_writer(year, datasets, in_table_names, filenames, variable_names, dataset_pool)
Esempio n. 7
0
    def run(self, base_directory, urbansim_cache_directory, years):
        """ run the simulation
                base_directory: directory contains all years folder of lccm.
                urbansim_cache_directory: directory contains all years folder of urbansim cache.
                years: lists of year to run."""
        model = LandCoverChangeModel(self.possible_lcts,
                                     submodel_string=self.lct_attribute,
                                     choice_attribute_name=self.lct_attribute,
                                     debuglevel=4)
        coefficients = Coefficients()
        storage = StorageFactory().get_storage('tab_storage',
                                               storage_location=os.path.join(
                                                   self.package_path, 'data'))
        coefficients.load(in_storage=storage,
                          in_table_name="land_cover_change_model_coefficients")
        specification = EquationSpecification(in_storage=storage)
        specification.load(
            in_table_name="land_cover_change_model_specification")
        specification.set_variable_prefix("biocomplexity.land_cover.")
        constants = Constants()
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(urbansim_cache_directory)
        attribute_cache = AttributeCache()
        index = arange(100000)
        for year in years:
            simulation_state.set_current_time(year)
            #land_cover_path = os.path.join(base_directory, str(year))
            land_cover_path = base_directory
            land_covers = LandCoverDataset(
                in_storage=StorageFactory().get_storage(
                    'flt_storage', storage_location=land_cover_path),
                out_storage=StorageFactory().get_storage(
                    'flt_storage', storage_location=land_cover_path),
                debuglevel=4)
            land_covers.subset_by_index(index)
            #land_covers.load_dataset()
            gridcells = GridcellDataset(in_storage=attribute_cache,
                                        debuglevel=4)

            agents_index = None
            model.run(specification,
                      coefficients,
                      land_covers,
                      data_objects={
                          "gridcell": gridcells,
                          "constants": constants,
                          "flush_variables": True
                      },
                      chunk_specification={'nchunks': 1})
            land_covers.flush_dataset()
            del gridcells
            del land_covers
Esempio n. 8
0
 def test_doing_refinements_from_specified_refinement_dataset(self):
     self.prepare_cache()
     os.system("%(python)s %(script)s -c %(cache_directory)s -s %(start_year)s -e %(end_year)s --refinements-directory=%(refinement_directory)s" % 
               {'python': sys.executable, 'script': self.script, 'cache_directory': self.cache_dir,
                'start_year': 2021, 'end_year': 2022,
                'refinement_directory': os.path.join(self.cache_dir, '2000')}
               )
     
     simulation_state = SimulationState()
     
     ## test refinement for 2021
     simulation_state.set_current_time(2021)
     jobs = self.dataset_pool.get_dataset('job')
     buildings = self.dataset_pool.get_dataset('building')
     jobs13_raz3 = jobs.compute_variables('numpy.logical_and(job.sector_id==13, job.disaggregate(parcel.raz_id==3, intermediates=[building]))', 
                                               dataset_pool=self.dataset_pool)
     jobs13_raz4 = jobs.compute_variables('numpy.logical_and(job.sector_id==13, job.disaggregate(parcel.raz_id, intermediates=[building])==4)', 
                                               dataset_pool=self.dataset_pool)
     jobs13_raz5 = jobs.compute_variables('numpy.logical_and(job.sector_id==13, job.disaggregate(parcel.raz_id, intermediates=[building])==5 )', 
                                               dataset_pool=self.dataset_pool)
     jobs_raz5 = jobs.compute_variables('job.disaggregate(parcel.raz_id, intermediates=[building])==5', 
                                             dataset_pool=self.dataset_pool)
     
     #check results
     self.assertEqual(jobs13_raz3.sum(), 0)
     self.assertEqual(jobs13_raz4.sum(), 0)
     self.assertEqual(jobs13_raz5.sum() >= 5, True)
     self.assertEqual(jobs_raz5.sum(), 7)
     expected_nr_sqft = array([6, 0, 3, 6, 1, 6, 5, 0])
     ## was             array([6, 2, 3, 6, 1, 2, 5, 0]),
     self.assert_(allclose(buildings.get_attribute('non_residential_sqft'),  expected_nr_sqft))
     
     self.dataset_pool.remove_all_datasets()
     
     
     ## test refinement for 2022
     simulation_state.set_current_time(2022)
     hhs = self.dataset_pool.get_dataset('household')
     buildings = self.dataset_pool.get_dataset('building')
     
     hhs_raz6 = hhs.compute_variables('household.disaggregate(building.disaggregate(parcel.raz_id)==6)', 
                                           dataset_pool=self.dataset_pool)
     hhs_bldg = buildings.compute_variables('building.number_of_agents(household)', 
                                                 dataset_pool=self.dataset_pool)
     
     #check results
     self.assertEqual(hhs_raz6.sum(), 7)
     self.assert_(hhs_bldg.sum(),  7 )
     self.assert_((hhs_bldg!=0).sum(),  2)
     self.assert_(buildings.get_attribute('residential_units').sum(),  7)
     
     self.dataset_pool.remove_all_datasets()        
Esempio n. 9
0
 def run(self, year, skim_directory=None):
     """ It gets the appropriate values from the 
     travel_model_configuration part of this config, and then copies the specified 
     data into the specified travel_data variable names.  Results in
     a new travel_data cache for year+1.
     """
     cache_directory = self.config['cache_directory']
     simulation_state = SimulationState()
     simulation_state.set_current_time(year)
     simulation_state.set_cache_directory(cache_directory)
     
     year_config = self.config['travel_model_configuration'][year]
     self.write_travel_data(year, cache_directory)
Esempio n. 10
0
    def run(self, year):
        """This is the main entry point.  The class is initialized with the appropriate configuration info from the 
        travel_model_configuration part of this config, and then copies the specified 
        UrbanSim data into files for emme/2 to read.  
        If households and jobs do not have a primary attribute zone_id, the entry 'locations_to_disaggregate'
        in the travel_model_configuration should be a list of dataset names over which the zone_id 
        will be dissaggregated, ordered from higher to lower aggregation level, e.g. ['parcel', 'building']
        """
        cache_directory = self.config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        sc = SessionConfiguration(
            new_instance=True,
            package_order=self.config['dataset_pool_configuration'].
            package_order,
            in_storage=attribute_cache)
        dataset_pool = sc.get_dataset_pool()

        hh_set = dataset_pool.get_dataset('household')
        zone_set = dataset_pool.get_dataset('zone')
        job_set = dataset_pool.get_dataset('job')
        locations_to_disaggregate = self.config['travel_model_configuration'][
            'locations_to_disaggregate']
        len_locations_to_disaggregate = len(locations_to_disaggregate)
        if len_locations_to_disaggregate > 0:
            primary_location = locations_to_disaggregate[0]
            if len_locations_to_disaggregate > 1:
                intermediates_string = ", intermediates=["
                for i in range(1, len_locations_to_disaggregate):
                    intermediates_string = "%s%s, " % (
                        intermediates_string, locations_to_disaggregate[i])
                intermediates_string = "%s]" % intermediates_string
            else:
                intermediates_string = ""
            hh_set.compute_variables([
                '%s = household.disaggregate(%s.%s %s)' %
                (zone_set.get_id_name()[0], primary_location,
                 zone_set.get_id_name()[0], intermediates_string)
            ],
                                     dataset_pool=dataset_pool)
            job_set.compute_variables([
                '%s = job.disaggregate(%s.%s %s)' %
                (zone_set.get_id_name()[0], primary_location,
                 zone_set.get_id_name()[0], intermediates_string)
            ],
                                      dataset_pool=dataset_pool)

        return self._call_input_file_writer(year, dataset_pool)
Esempio n. 11
0
    def target_func(self,
                    est_v,
                    func=lambda x, y: np.sum(np.abs(x - y)),
                    **kwargs):
        ''' Target function.'''

        simulation_state = SimulationState()
        simulation_state.set_current_time(self.base_year)
        simulation_state.set_cache_directory(self.cache_directory)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(
            new_instance=True,
            package_order=self.package_order,
            in_storage=attribute_cache).get_dataset_pool()

        calib_datasets = {}
        for dataset_name, calib_attr in calib_datasets.iteritems():
            dataset = dataset_pool.get_dataset(
                dataset_name, dataset_arguments={'id_name': []})
            assert subset is None or subset.get(dataset_name, None) is None or \
                   subset_patterns is None or subset_patterns.get(dataset_name, None) is None
            if subset is not None and subset.get(dataset_name,
                                                 None) is not None:
                subset_attr, subset_cond = subset.get(dataset_name)
                index = np.in1d(dataset[subset_attr], subset_cond)
            elif subset_patterns is not None and subset_patterns.get(
                    dataset_name, None) is not None:
                subset_attr, subset_pattern = subset_patterns.get(dataset_name)
                index = array([
                    True if re.search(subset_pattern, attr_v) else False
                    for attr_v in dataset[subset_attr]
                ])
            else:
                index = arange(dataset.size(), dtype='i')

            calib_datasets[dataset_name] = [dataset, calib_attr, index]

        prediction = self.update_prediction(est_v, simulation_state,
                                            dataset_pool, calib_datasets,
                                            **kwargs)
        ## allow keys in target not appearing in prediction
        ## assuming their values to be 0
        ### every key in target should appear in prediction
        #assert np.all( np.in1d(self.target.keys(), prediction.keys()) )
        target = np.array(self.target.values())
        predct = np.array([prediction[k] if prediction.has_key(k) else 0 \
                           for k in self.target.keys() ])
        results = func(predct, target)

        return results
Esempio n. 12
0
def setup_environment(cache_directory, year, package_order, additional_datasets={}):
    gc.collect()
    ss = SimulationState(new_instance=True)
    ss.set_cache_directory(cache_directory)
    ss.set_current_time(year)
    ac = AttributeCache()
    storage = ac.get_flt_storage_for_year(year)
    sc = SessionConfiguration(new_instance=True,
                         package_order=package_order,
                         in_storage=ac)
    logger.log_status("Setup environment for year %s. Use cache directory %s." % (year, storage.get_storage_location()))
    dp = sc.get_dataset_pool()
    for name, ds in additional_datasets.iteritems():
        dp.replace_dataset(name, ds)
    return dp
Esempio n. 13
0
 def run(self, year):
     """Like its parent, but report files have different format and there are no banks.
     Zones are assumed to have no gaps.
     """
     cache_directory = self.config['cache_directory']
     simulation_state = SimulationState()
     simulation_state.set_current_time(year)
     simulation_state.set_cache_directory(cache_directory)
     
     year_config = self.config['travel_model_configuration'][year]
     
     bank_path = os.path.sep.join([self.get_emme2_base_dir()] + self.config['travel_model_configuration'][year]['bank'])
     for path, variable_dict in year_config['matrix_variable_map'].iteritems():
         path_name = os.path.sep.join([bank_path] + path.split('.'))
         self.get_needed_matrices_from_emme4(year, 
                                             year_config['cache_directory'],
                                             path_name, variable_dict)
    def run(self, year):
        """This is the main entry point.  The class is initialized with the appropriate configuration info from the 
        travel_model_configuration part of this config, and then copies the specified 
        UrbanSim data into files for emme/2 to read.  
        If households and jobs do not have a primary attribute zone_id, the entry 'locations_to_disaggregate'
        in the travel_model_configuration should be a list of dataset names over which the zone_id 
        will be dissaggregated, ordered from higher to lower aggregation level, e.g. ['parcel', 'building']
        """
        cache_directory = self.config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        sc = SessionConfiguration(new_instance=True,
                                  package_order=self.config['dataset_pool_configuration'].package_order,
                                  in_storage=attribute_cache)
        dataset_pool = sc.get_dataset_pool()

        hh_set = dataset_pool.get_dataset('household')
        zone_set = dataset_pool.get_dataset('zone')
        job_set = dataset_pool.get_dataset('job')
        locations_to_disaggregate = self.config['travel_model_configuration']['locations_to_disaggregate']
        len_locations_to_disaggregate = len(locations_to_disaggregate)
        if len_locations_to_disaggregate > 0:
            primary_location = locations_to_disaggregate[0]
            if len_locations_to_disaggregate > 1:
                intermediates_string = ", intermediates=["
                for i in range(1, len_locations_to_disaggregate):
                    intermediates_string = "%s%s, " % (intermediates_string, locations_to_disaggregate[i])
                intermediates_string = "%s]" % intermediates_string
            else:
                intermediates_string = ""
            hh_set.compute_variables(['%s = household.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0],
                                                                                 primary_location, zone_set.get_id_name()[0],
                                                                                 intermediates_string)], 
                                      dataset_pool=dataset_pool)
            job_set.compute_variables(['%s = job.disaggregate(%s.%s %s)' % (zone_set.get_id_name()[0],
                                                                            primary_location, zone_set.get_id_name()[0],
                                                                            intermediates_string)], 
                                       dataset_pool=dataset_pool)
        
        return self._call_input_file_writer(year, dataset_pool)
Esempio n. 15
0
 def run(self, year):
     """Like its parent, but skims are stored locally in matrix_directory in hdf5 format.
     It is one file per year, called xxxx-travelmodel.h5, where xxxx is the year. 
     Each file has one group per bank, e.g. Bank1, which contains the matrices.
     Zones are assumed to have no gaps.
     """
     cache_directory = self.config['cache_directory']
     simulation_state = SimulationState()
     simulation_state.set_current_time(year)
     simulation_state.set_cache_directory(cache_directory)
     
     year_config = self.config['travel_model_configuration'][year]
     
     bank_path = os.path.sep.join([self.get_emme2_base_dir()] + self.config['travel_model_configuration'][year]['bank'])
     #bank_file = os.path.join(matrix_directory, "%s-travelmodel.h5" % bank_year)
     for path, variable_dict in year_config['matrix_variable_map'].iteritems():
         path_name = os.path.sep.join([bank_path] + path.split('.'))
         self.get_needed_matrices_from_emme4(year, 
                                             year_config['cache_directory'],
                                             path_name, variable_dict)
Esempio n. 16
0
def setup_environment(cache_directory,
                      year,
                      package_order,
                      additional_datasets={}):
    gc.collect()
    ss = SimulationState(new_instance=True)
    ss.set_cache_directory(cache_directory)
    ss.set_current_time(year)
    ac = AttributeCache()
    storage = ac.get_flt_storage_for_year(year)
    sc = SessionConfiguration(new_instance=True,
                              package_order=package_order,
                              in_storage=ac)
    logger.log_status(
        "Setup environment for year %s. Use cache directory %s." %
        (year, storage.get_storage_location()))
    dp = sc.get_dataset_pool()
    for name, ds in additional_datasets.iteritems():
        dp.replace_dataset(name, ds)
    return dp
    def target_func(self, est_v, func=lambda x, y: np.sum(np.abs(x - y)), **kwargs):
        """ Target function."""

        simulation_state = SimulationState()
        simulation_state.set_current_time(self.base_year)
        simulation_state.set_cache_directory(self.cache_directory)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(
            new_instance=True, package_order=self.package_order, in_storage=attribute_cache
        ).get_dataset_pool()

        calib_datasets = {}
        for dataset_name, calib_attr in calib_datasets.iteritems():
            dataset = dataset_pool.get_dataset(dataset_name, dataset_arguments={"id_name": []})
            assert (
                subset is None
                or subset.get(dataset_name, None) is None
                or subset_patterns is None
                or subset_patterns.get(dataset_name, None) is None
            )
            if subset is not None and subset.get(dataset_name, None) is not None:
                subset_attr, subset_cond = subset.get(dataset_name)
                index = np.in1d(dataset[subset_attr], subset_cond)
            elif subset_patterns is not None and subset_patterns.get(dataset_name, None) is not None:
                subset_attr, subset_pattern = subset_patterns.get(dataset_name)
                index = array([True if re.search(subset_pattern, attr_v) else False for attr_v in dataset[subset_attr]])
            else:
                index = arange(dataset.size(), dtype="i")

            calib_datasets[dataset_name] = [dataset, calib_attr, index]

        prediction = self.update_prediction(est_v, simulation_state, dataset_pool, calib_datasets, **kwargs)
        ## allow keys in target not appearing in prediction
        ## assuming their values to be 0
        ### every key in target should appear in prediction
        # assert np.all( np.in1d(self.target.keys(), prediction.keys()) )
        target = np.array(self.target.values())
        predct = np.array([prediction[k] if prediction.has_key(k) else 0 for k in self.target.keys()])
        results = func(predct, target)

        return results
Esempio n. 18
0
    def run(self, year, matrix_directory=None):
        """This is the main entry point.  It gets the appropriate values from the 
        travel_model_configuration part of this config, and then copies the specified 
        emme/2 matrices into the specified travel_data variable names.  Results in
        a new travel_data cache for year+1.
        If matrix_directory is not None, it is assumed the matrices files are already created 
        in the given directory.
        """
        cache_directory = self.config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_current_time(year)
        simulation_state.set_cache_directory(cache_directory)

        year_config = self.config['travel_model_configuration'][year]
        matrices_created = False
        if matrix_directory is not None:
            matrices_created = True
        reports = self.config['travel_model_configuration'].get(
            'reports_to_copy', [])

        for x in 1, 2, 3:
            if matrix_directory is None:
                bank_dir = self.get_emme2_dir(year, "bank%i" % x)
            else:
                bank_dir = os.path.join(matrix_directory, "bank%i" % x)
            if "bank%i" % x in year_config['matrix_variable_map']:
                self.get_needed_matrices_from_emme2(
                    year, year_config['cache_directory'], bank_dir,
                    year_config['matrix_variable_map']["bank%i" % x],
                    matrices_created)
                for report in reports:
                    self.copy_report_to_cache(report, year,
                                              year_config['cache_directory'],
                                              bank_dir)
            if "bank%i" % x in year_config.get('node_matrix_variable_map', {}):
                node_variable_map = year_config['node_matrix_variable_map'][
                    "bank%i" % x]
                if len(node_variable_map.keys()) > 0:
                    self.get_needed_node_matrices_from_emme2(
                        year, year_config['cache_directory'], bank_dir,
                        node_variable_map)
Esempio n. 19
0
def import_travel_model_data(config, year):

    cache_directory = config['cache_directory']
    simulation_state = SimulationState()
    simulation_state.set_current_time(year)
    simulation_state.set_cache_directory(cache_directory)
    out_store = AttributeCache().get_flt_storage_for_year(year+1)
    out_store_loc = out_store.get_storage_location()

    tm_config = config['travel_model_configuration']
    data_to_import = tm_config['tm_to_urbansim_variable_mapping'] 
    base_dir = mtc_common.tm_get_base_dir(config)
    data_dir = tm_config[year]['data_dir']

    for dataset_name, skim_file in data_to_import.iteritems():
        skim_file = os.path.join(base_dir, data_dir, skim_file)
        data = read_csv(skim_file, header=0)
        
        with block("Caching {} to {}".format(dataset_name, out_store_loc)):
            logger.log_status("Source file {}".format(skim_file))
            opus_ds = to_opus_dataset(data, out_store, dataset_name)
Esempio n. 20
0
 def test_doing_other_refinements(self):
     self.prepare_cache()
     os.system("%(python)s  %(script)s -c %(cache_directory)s -s %(start_year)s -e %(end_year)s" % 
               {'python': sys.executable, 'script': self.script, 'cache_directory':self.cache_dir,
                'start_year': 2023, 'end_year': 2027 }
               )        
     
     
     simulation_state = SimulationState()
             
     ## test refinement for 2023
     simulation_state.set_current_time(2023)
     hhs = self.dataset_pool.get_dataset('household')
     
     hhs_p5 = hhs.compute_variables('household.persons>5')
     
     #check results
     self.assert_(hhs.size(),  2)
     self.assertEqual(hhs_p5.sum(), 0)
     
     self.dataset_pool.remove_all_datasets()
     
     ## test refinement for 2027
     simulation_state.set_current_time(2027)
     hhs = self.dataset_pool.get_dataset('household')
     buildings = self.dataset_pool.get_dataset('building')
     persons = self.dataset_pool.get_dataset('person')
     
     
     hhs_raz6 = hhs.compute_variables('household.disaggregate(building.disaggregate(parcel.raz_id)==6)', 
                                           dataset_pool=self.dataset_pool)
     hhs_bldg = buildings.compute_variables('building.number_of_agents(household)', 
                                                 dataset_pool=self.dataset_pool)
     
     #check results
     self.assertEqual(hhs_raz6.sum(), 3)
     self.assert_(hhs_bldg.sum(),  3 )
     self.assert_((hhs_bldg!=0).sum(),  2)
     self.assert_(allclose(persons.get_attribute('job_id'), array([-1,  -1, -1, -1,  3,  4,  7])))
Esempio n. 21
0
 def run(self, year):
     """
     Copy skims stored in hdf5 format into the UrbanSim cache.
     
     Should run after psrc_parcel.emme.models.run_export_skims which creates the skims hdf5 file.
     It creates a travel_model dataset with each skim being an attribute of it. 
     Zones are assumed to have no gaps.
     
     Arguments:
     year -- year of the urbansim run. Used to extract the TM year from the bank configuration.
     
     Configuration entries (in travel_model_configuration) used:
     matrix_variable_map -- dictionary of bank names and corresponding skim names.
             Bank names are the path where (back-)slashes are replaced by dots, e.g. skims.auto.am.
             A value for each of such bank name is a dictionary with keys being skim names and 
             values being the desired urbansim attribute name. E.g.
             {'skims.nonmotorized.am':
                   {'abketm': 'am_bike_to_work_travel_time',
                    'awlktm': 'am_walk_time_in_minutes'
                   }
             }
     matrix_h5_directory -- path to the hdf5 file called xxxx-travelmodel.h5  
             where xxxx is replaced by the TM year (default is the Emme base directory), 
             which contains the skims as n x n matrices.
     """
     cache_directory = self.config['cache_directory']
     simulation_state = SimulationState()
     simulation_state.set_current_time(year)
     simulation_state.set_cache_directory(cache_directory)
     
     tmconfig = self.config['travel_model_configuration']
     year_config = tmconfig[year]
     matrix_directory = tmconfig.get('matrix_h5_directory', self.get_emme2_base_dir())        
     bank_year = tmconfig[year]['bank'][0]
     bank_file = os.path.join(matrix_directory, "%s-travelmodel.h5" % bank_year)
     for path, variable_dict in year_config['matrix_variable_map'].iteritems():
         self.get_needed_matrices_from_emme4(year, 
                                             year_config['cache_directory'],
                                             path, variable_dict, bank_file=bank_file)
    def run(self, config, year, *args, **kwargs):
        """This is the main entry point.  It gets the appropriate values from the 
        travel_model_configuration part of this config, and then copies the specified 
        data into the specified travel_data variable names.  Results in
        a new travel_data cache for year+1.
        """
        cache_directory = config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_current_time(year)
        simulation_state.set_cache_directory(cache_directory)
        
        logger.start_block('Getting data from travel model')
        next_year = year + 1
        flt_dir_for_next_year = os.path.join(cache_directory, str(next_year))
        if not os.path.exists(flt_dir_for_next_year):
            os.mkdir(flt_dir_for_next_year)
        attribute_cache = AttributeCache()        
        dataset_pool = SessionConfiguration(new_instance=True,
                                            package_order=config['dataset_pool_configuration'].package_order,
                                            in_storage=attribute_cache).get_dataset_pool()
        zone_set = dataset_pool.get_dataset('zone')

#        zone_set = ZoneDataset(in_storage_location=flt_dir_for_this_year, 
#                               in_storage_type='flt_storage', 
#                               in_table_name='zones')
        zone_set.load_dataset()
        self.prepare_for_run(config['travel_model_configuration'], year)
        travel_data_set = self.get_travel_data_from_travel_model(config, year, zone_set, 
                                                                 *args, **kwargs)
        logger.end_block()
        
        logger.start_block('Writing travel data to cache')
        out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_dir_for_next_year)
        #out_storage = flt_storage(Resources(data={"storage_location":flt_dir_for_next_year}))
        travel_data_set.write_dataset(attributes=travel_data_set.get_known_attribute_names(), 
                                      out_storage=out_storage, 
                                      out_table_name='travel_data')
        logger.end_block()
Esempio n. 23
0
    def run(self, base_directory, urbansim_cache_directory, years):
        """ run the simulation
                base_directory: directory contains all years folder of lccm.
                urbansim_cache_directory: directory contains all years folder of urbansim cache.
                years: lists of year to run."""
        model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, 
                                     choice_attribute_name= self.lct_attribute, debuglevel=4)
        coefficients = Coefficients()
        storage = StorageFactory().get_storage('tab_storage', 
            storage_location=os.path.join(self.package_path, 'data'))
        coefficients.load(in_storage=storage, in_table_name="land_cover_change_model_coefficients")
        specification = EquationSpecification(in_storage=storage)
        specification.load(in_table_name="land_cover_change_model_specification")
        specification.set_variable_prefix("biocomplexity.land_cover.")
        constants = Constants()
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(urbansim_cache_directory)
        attribute_cache = AttributeCache()
        index = arange(100000)
        for year in years:
            simulation_state.set_current_time(year)
            #land_cover_path = os.path.join(base_directory, str(year))
            land_cover_path = base_directory
            land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                       out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                       debuglevel=4)
            land_covers.subset_by_index(index)
            #land_covers.load_dataset()
            gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4)

            agents_index = None
            model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells,
                          "constants":constants, "flush_variables":True},
                          chunk_specification = {'nchunks':1}
                          )
            land_covers.flush_dataset()
            del gridcells
            del land_covers
    def run(self, config, year, *args, **kwargs):
        """This is the main entry point.  It gets the appropriate configuration info from the 
        travel_model_configuration part of this config, and then copies the specified 
        UrbanSim data into files for travel mdel to read.  
        """
        cache_directory = config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(
            new_instance=True,
            package_order=config['dataset_pool_configuration'].package_order,
            in_storage=attribute_cache).get_dataset_pool()

        #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache)
        #datasets = DatasetFactory().create_datasets_from_flt(config.get('datasets_to_preload',{}),
        #"urbansim",
        #additional_arguments={'in_storage': attribute_cache})
        zone_set = dataset_pool.get_dataset('zone')
        self.prepare_for_run(config['travel_model_configuration'], year)
        self.create_travel_model_input_file(config, year, zone_set,
                                            dataset_pool, *args, **kwargs)
Esempio n. 25
0
    def run(self, config, show_output=False):
        logger.log_status("Caching large SQL tables to: " +
                          config['cache_directory'])
        self.show_output = show_output

        #import pydevd;pydevd.settrace()

        server_configuration = config['scenario_database_configuration']

        scenario_database_manager = ScenarioDatabaseManager(
            server_configuration=server_configuration,
            base_scenario_database_name=server_configuration.database_name)

        self.database_server = DatabaseServer(server_configuration)

        database_to_table_mapping = scenario_database_manager.get_database_to_table_mapping(
        )

        self.tables_to_cache = config[
            'creating_baseyear_cache_configuration'].tables_to_cache

        simulation_state = SimulationState()
        if 'low_memory_run' in config:
            simulation_state.set_low_memory_run(config['low_memory_run'])
        simulation_state.set_cache_directory(config['cache_directory'])
        simulation_state.set_current_time(config['base_year'])

        self.tables_cached = set()
        for database_name, tables in database_to_table_mapping.items():
            self.cache_database_tables(config, database_name, tables)

        un_cached_tables = set(self.tables_to_cache) - self.tables_cached
        if un_cached_tables:
            logger.log_warning(
                'The following requested tables were NOT cached:')
            for table_name in un_cached_tables:
                logger.log_warning('\t%s' % table_name)
Esempio n. 26
0
    def run(self, table_names, out_storage=None, table_name_pattern=None, cache_directory=None, year=None, **kwargs):
        """
        export specified tables to database

        table_name_pattern: For example '{table_name}_{scenario_name}_{year}'
        """
        if not hasattr(self, "out_storage"):
            if out_storage is None:
                raise ValueError, "Either out_storage argument needs to be specified or " + "prepare_for_run called before run method to create a valid out_storage."
            else:
                self.out_storage = out_storage
        sim_state = SimulationState()
        if sim_state.get_current_time() == 0:
            sim_state.set_current_time(9999)
        if cache_directory is None:
            cache_directory = sim_state.get_cache_directory()

        attr_cache = AttributeCache(cache_directory=cache_directory)
        if year is None:
            years = attr_cache._get_sorted_list_of_years()
        else:
            assert isinstance(year, int)
            years = [year]

        for table_name in table_names:
            kwargs["table_name"] = table_name
            for year in years:
                kwargs["year"] = year
                out_table_name = table_name_pattern.format(**kwargs)
                in_storage = attr_cache.get_flt_storage_for_year(year)
                # cache_path = os.path.join(cache_directory, str(year))
                # in_storage = flt_storage(storage_location=cache_path)
                # TODO drop_table(table_name) if table_name exists
                ExportStorage().export_dataset(
                    table_name, in_storage=in_storage, out_storage=self.out_storage, out_dataset_name=out_table_name
                )
        self.post_run(kwargs["scenario_name"], years)
 def run(self, year, matrix_directory=None):
     """This is the main entry point.  It gets the appropriate values from the 
     travel_model_configuration part of this config, and then copies the specified 
     emme/2 matrices into the specified travel_data variable names.  Results in
     a new travel_data cache for year+1.
     If matrix_directory is not None, it is assumed the matrices files are already created 
     in the given directory.
     """
     cache_directory = self.config['cache_directory']
     simulation_state = SimulationState()
     simulation_state.set_current_time(year)
     simulation_state.set_cache_directory(cache_directory)
     
     year_config = self.config['travel_model_configuration'][year]
     matrices_created = False
     if matrix_directory is not None:
         matrices_created = True    
     reports = self.config['travel_model_configuration'].get('reports_to_copy', [])
     
     for x in 1,2,3:
         if matrix_directory is None:
             bank_dir = self.get_emme2_dir(year, "bank%i" % x)
         else:
             bank_dir = os.path.join(matrix_directory, "bank%i" % x)
         if "bank%i" % x in year_config['matrix_variable_map']:
             self.get_needed_matrices_from_emme2(year, 
                                             year_config['cache_directory'],
                                             bank_dir,
                                             year_config['matrix_variable_map']["bank%i" % x],
                                                 matrices_created)
             for report in reports:
                 self.copy_report_to_cache(report, year, year_config['cache_directory'], bank_dir)
         if "bank%i" % x in year_config.get('node_matrix_variable_map', {}):
             node_variable_map = year_config['node_matrix_variable_map']["bank%i" % x]
             if len(node_variable_map.keys()) > 0:
                 self.get_needed_node_matrices_from_emme2(year, year_config['cache_directory'], bank_dir, node_variable_map)
Esempio n. 28
0
class TestLagVariables(opus_unittest.OpusTestCase):
    def setUp(self):
        self.config = TestCacheConfiguration()

        self.simulation_state = SimulationState(new_instance=True)
        SessionConfiguration(self.config,
                             new_instance=True,
                             package_order=['urbansim', 'opus_core'],
                             in_storage=AttributeCache())

        self.base_year = self.config['base_year']
        creating_baseyear_cache_configuration = self.config[
            'creating_baseyear_cache_configuration']

        self.simulation_state.set_current_time(self.base_year)

        cache_directory = self.simulation_state.get_cache_directory()
        copytree(
            os.path.join(
                creating_baseyear_cache_configuration.baseyear_cache.
                existing_cache_to_copy, str(self.base_year)),
            os.path.join(cache_directory, str(self.base_year)))
        cacher = CacheScenarioDatabase()
        cacher.prepare_data_before_baseyear(
            cache_directory, self.base_year,
            creating_baseyear_cache_configuration)

        self.config['cache_directory'] = cache_directory

        cache_storage = AttributeCache().get_flt_storage_for_year(
            self.base_year)
        cache_directory = self.simulation_state.get_cache_directory()
        flt_directory = os.path.join(cache_directory, str(self.base_year))
        self.gridcell = DatasetFactory().get_dataset(
            'gridcell',
            package='urbansim',
            subdir='datasets',
            arguments={
                'in_storage':
                StorageFactory().get_storage('flt_storage',
                                             storage_location=flt_directory)
            })

    def tearDown(self):
        self.simulation_state.remove_singleton(delete_cache=True)

    def test_lag_variables(self):
        """Test lag variables"""
        # A weak test that computing a lag variable on a realistic dataset does not crash.
        self.gridcell.compute_variables(
            'urbansim.gridcell.n_recent_transitions_to_developed',
            resources=self.config)

        # The following tests are fragile, since they need to know exactly what values are being
        # subtracted, and ignore any negative amount that is truncated at zero.
        # If you change the "subset" dataset to a different region, you will
        # have to update the expected value.
        self.gridcell.compute_variables('urbansim.gridcell.commercial_sqft',
                                        resources=self.config)
        self.gridcell.compute_variables(
            'urbansim.gridcell.commercial_sqft_lag1', resources=self.config)
        self.gridcell.compute_variables(
            'urbansim.gridcell.commercial_sqft_lag2', resources=self.config)

        sqft = self.gridcell.get_attribute('commercial_sqft').sum()
        sqft_lag1 = self.gridcell.get_attribute('commercial_sqft_lag1').sum()
        sqft_lag2 = self.gridcell.get_attribute('commercial_sqft_lag2').sum()

        logger.log_status('sqft = %s' % sqft)
        logger.log_status('sqft_lag1 = %s' % sqft_lag1)
        logger.log_status('sqft_lag2 = %s' % sqft_lag2)
        logger.log_status('base_year = %s' % self.base_year)

        self.assertEqual(self.base_year, SimulationState().get_current_time())
        self.assertEqual(sqft, sqft_lag1)
        self.assertEqual(578 + 2083 + 1103 + 87, sqft_lag1 - sqft_lag2)

        # Do lag variables produce different results for derived attributes?
        self.gridcell.compute_variables(
            'urbansim.gridcell.n_recent_development_projects',
            resources=self.config)
        self.gridcell.compute_variables(
            'urbansim.gridcell.n_recent_development_projects_lag1',
            resources=self.config)
        n_recent_projects = self.gridcell.get_attribute(
            'n_recent_development_projects').sum()
        n_recent_projects_lag1 = self.gridcell.get_attribute(
            'n_recent_development_projects_lag1').sum()

        self.assertEqual(n_recent_projects, 11)
        self.assertEqual(n_recent_projects_lag1, 15)

        # Do lag_variables produce different results for derived attributes without lags?
        self.gridcell.compute_variables('urbansim.gridcell.ln_commercial_sqft',
                                        resources=self.config)
        self.gridcell.compute_variables(
            'urbansim.gridcell.ln_commercial_sqft_lag4', resources=self.config)
        sqft = self.gridcell.get_attribute('ln_commercial_sqft').sum()
        sqft_lag4 = self.gridcell.get_attribute(
            'ln_commercial_sqft_lag4').sum()

        self.assertNotEqual(sqft, sqft_lag4)
Esempio n. 29
0
    def run(self,
            optimizer='lbfgsb',
            results_pickle_prefix="calib",
            optimizer_kwargs={}):
        ''' Call specifized optimizer to calibrate
        
        Arguments:
            - optimizer: optimization method chosen (fmin_bfgs, simulated anneal etc.)
            - results_pickle_prefix: prefix of the pickle file name that will be saved after the simulation; if None, results is not saved
            
        Returns:
            - the results from the opimizater
            - a pickle dump of the results in the cache_directory, if results_pickle_prefix is specified
        
        '''

        simulation_state = SimulationState()
        simulation_state.set_current_time(self.base_year)
        simulation_state.set_cache_directory(self.cache_directory)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(
            new_instance=True,
            package_order=self.package_order,
            in_storage=attribute_cache).get_dataset_pool()

        calib_datasets = {}
        for dataset_name, calib_attr in calib_datasets.iteritems():
            dataset = dataset_pool.get_dataset(
                dataset_name, dataset_arguments={'id_name': []})
            assert subset is None or subset.get(dataset_name, None) is None or \
                   subset_patterns is None or subset_patterns.get(dataset_name, None) is None
            if subset is not None and subset.get(dataset_name,
                                                 None) is not None:
                subset_attr, subset_cond = subset.get(dataset_name)
                index = np.in1d(dataset[subset_attr], subset_cond)
            elif subset_patterns is not None and subset_patterns.get(
                    dataset_name, None) is not None:
                subset_attr, subset_pattern = subset_patterns.get(dataset_name)
                index = array([
                    True if re.search(subset_pattern, attr_v) else False
                    for attr_v in dataset[subset_attr]
                ])
            else:
                index = arange(dataset.size(), dtype='i')

            calib_datasets[dataset_name] = [dataset, calib_attr, index]

        init_v = array([], dtype='f8')
        for dataset_name, calib in calib_datasets.iteritems():
            dataset, calib_attr, index = calib
            if type(calib_attr) == str:
                init_v = np.concatenate((init_v, dataset[calib_attr][index]))
            elif type(calib_attr) in (list, tuple):
                for attr in calib_attr:
                    init_v = np.concatenate((init_v, dataset[attr][index]))
            else:
                raise TypeError, "Unrecongized data type in calib_datasets"

        t0 = time.time()

        if is_parallelizable == True: set_parallel(True)

        print OKBLUE + "\noptimizer = {} (is_parallel = {})".format(
            optimizer, is_parallelizable) + ENDC
        print OKBLUE + "-------------------------------------------------------\n" + ENDC
        if optimizer == 'bfgs':
            default_kwargs = {
                'fprime': None,
                'epsilon': 1e-08,
                'maxiter': None,
                'full_output': 1,
                'disp': 1,
                'retall': 0,
                'callback': None
            }
            optimizer_func = fmin_bfgs
        elif optimizer == 'lbfgsb':
            default_kwargs = {
                'fprime': None,
                'approx_grad': True,
                'bounds': None,
                'factr': 1e12,
                'iprint': 1
            }

            optimizer_func = fmin_l_bfgs_b
        elif optimizer == 'anneal':
            default_kwargs = {
                'schedule': 'fast',
                'full_output': 1,
                'T0': None,
                'Tf': 1e-12,
                'maxeval': None,
                'maxaccept': None,
                'maxiter': 400,
                'boltzmann': 1.0,
                'learn_rate': 0.5,
                'feps': 1e-06,
                'quench': 1.0,
                'm': 1.0,
                'n': 1.0,
                'lower': -1,
                'upper': 1,
                'dwell': 50,
                'disp': True
            }

            optimizer_func = anneal
        elif optimizer == 'panneal':
            default_kwargs = {
                'schedule': 'fast',
                'full_output': 1,
                'T0': None,
                'Tf': 1e-12,
                'maxeval': None,
                'maxaccept': None,
                'maxiter': 400,
                'boltzmann': 1.0,
                'learn_rate': 0.5,
                'feps': 1e-06,
                'quench': 1.0,
                'm': 1.0,
                'n': 1.0,
                'lower': -1,
                'upper': 1,
                'dwell': 50,
                'disp': True,
                'cores': 24,
                'interv': 20
            }

            optimizer_func = panneal
        else:
            raise ValueError, "Unrecognized optimizer {}".format(optimizer)

        default_kwargs.update(optimizer_kwargs)
        results = optimizer_func(self.target_func, copy(init_v),
                                 **default_kwargs)

        duration = time.time() - t0
        if results_pickle_prefix is not None:
            pickle_file = "{}_{}.pickle".format(results_pickle_prefix,
                                                optimizer)
            pickle_file = os.path.join(self.log_directory, pickle_file)
            pickle.dump(results, open(pickle_file, "wb"))

        if is_parallelizable == True: set_parallel(False)

        logger.log_status('init target_func: {}'.format(
            self.target_func(init_v)))
        logger.log_status('end target_func: {}'.format(
            results[:]))  #which one?
        logger.log_status('outputs from optimizer: {}'.format(results))
        logger.log_status('Execution time: {}'.format(duration))
Esempio n. 30
0
            ("neighborhood.ln_price","ln_price"),
            ("paris.household_x_neighborhood.age_lnprice","age_lnprice"),
            ("paris.household_x_neighborhood.lninc_lnprice","lninc_lnprice"),
            ("paris.neighborhood.delta_pop","delta_pop"),
            ("neighborhood.rail9","rail"),
            ("neighborhood.subway","subway"),
            ("neighborhood.disthwy","disthwy"),
            ("neighborhood.tc","tc"),
            ("neighborhood.vp","vp"),
            ("paris.household_x_neighborhood.hhfem_nbtc","hhfem_nbtc")
            )
    }

    from my_estimation_config import my_configuration    
    ss = SimulationState()
    ss.set_current_time(2000)
    ss.set_cache_directory(my_configuration['cache_directory'])

    attribute_cache = AttributeCache()
    sc = SessionConfiguration(new_instance=True,
                         package_order=my_configuration['dataset_pool_configuration'].package_order,
                         in_storage=attribute_cache)


    #settings = ParisSettings()
    #settings.prepare_session_configuration()
    estimator = HLCMEstimator(config=my_configuration,
                              save_estimation_results=False)

    #estimator = HLCMEstimator(settings=my_configuration, 
    #                    run_land_price_model_before_estimation=False, 
             "hhfrench_nbforeign"),
            ("neighborhood.ln_price", "ln_price"),
            ("paris.household_x_neighborhood.age_lnprice", "age_lnprice"),
            ("paris.household_x_neighborhood.lninc_lnprice", "lninc_lnprice"),
            ("paris.neighborhood.delta_pop", "delta_pop"),
            ("neighborhood.rail9", "rail"),
            ("neighborhood.subway", "subway"),
            ("neighborhood.disthwy", "disthwy"),
            ("neighborhood.tc", "tc"),
            ("neighborhood.vp", "vp"),
            ("paris.household_x_neighborhood.hhfem_nbtc", "hhfem_nbtc"))
    }

    from my_estimation_config import my_configuration
    ss = SimulationState()
    ss.set_current_time(2000)
    ss.set_cache_directory(my_configuration['cache_directory'])

    attribute_cache = AttributeCache()
    sc = SessionConfiguration(
        new_instance=True,
        package_order=my_configuration['dataset_pool_configuration'].
        package_order,
        in_storage=attribute_cache)

    #settings = ParisSettings()
    #settings.prepare_session_configuration()
    estimator = HLCMEstimator(config=my_configuration,
                              save_estimation_results=False)

    #estimator = HLCMEstimator(settings=my_configuration,
    def __init__(self, config):
        if 'estimation_database_configuration' in config:
            db_server = DatabaseServer(
                config['estimation_database_configuration'])
            db = db_server.get_database(
                config['estimation_database_configuration'].database_name)

            out_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=db)
        else:
            out_storage = StorageFactory().get_storage(
                type='flt_storage',
                storage_location=os.path.join(config['cache_directory'],
                                              str(config['base_year'] + 1)))

        simulation_state = SimulationState()
        simulation_state.set_cache_directory(config['cache_directory'])
        simulation_state.set_current_time(config['base_year'])
        attribute_cache = AttributeCache()

        SessionConfiguration(
            new_instance=True,
            package_order=config['dataset_pool_configuration'].package_order,
            in_storage=attribute_cache)

        if not os.path.exists(
                os.path.join(config['cache_directory'], str(
                    config['base_year']))):
            #raise RuntimeError, "datasets uncached; run prepare_estimation_data.py first"
            CacheScenarioDatabase().run(config, unroll_gridcells=False)

        for dataset_name in config['datasets_to_preload']:
            SessionConfiguration().get_dataset_from_pool(dataset_name)

        households = SessionConfiguration().get_dataset_from_pool("household")
        household_ids = households.get_id_attribute()
        workers = households.get_attribute("workers")

        hh_ids = []
        member_ids = []
        is_worker = []
        job_ids = []

        for i in range(households.size()):
            if workers[i] > 0:
                hh_ids += [household_ids[i]] * workers[i]
                member_ids += range(1, workers[i] + 1)
                is_worker += [1] * workers[i]
                job_ids += [-1] * workers[i]

        in_storage = StorageFactory().get_storage('dict_storage')

        persons_table_name = 'persons'
        in_storage.write_table(
            table_name=persons_table_name,
            table_data={
                'person_id': arange(len(hh_ids)) + 1,
                'household_id': array(hh_ids),
                'member_id': array(member_ids),
                'is_worker': array(is_worker),
                'job_id': array(job_ids),
            },
        )

        persons = PersonDataset(in_storage=in_storage,
                                in_table_name=persons_table_name)
        persons.write_dataset(out_storage=out_storage,
                              out_table_name=persons_table_name)
Esempio n. 33
0
class MoreDatasetTests(opus_unittest.OpusTestCase):
    def setUp(self):
        self.start_year = 2001
        self.expected_sic_data = array([6,4,7,808,6])
        self.job_id = array([1,2,3,4,5])
        self.base_cache_dir = tempfile.mkdtemp(prefix='opus_tmp_test_dataset')
        self.simulation_state = SimulationState(low_memory_run=True, new_instance=True, base_cache_dir=self.base_cache_dir)
        self.dir = self.simulation_state.get_cache_directory()
        self.simulation_state.set_current_time(self.start_year)
        
        if not os.path.exists(self.dir):
            os.makedirs(self.dir)

        self.in_storage = StorageFactory().get_storage('dict_storage')
        self.in_storage.write_table(
            table_name='jobs',
            table_data={
                'grid_id':array([10,20,30,40,50]),
                'job_id':self.job_id,
                },
            )
            
        self.out_storage = StorageFactory().get_storage('dict_storage')
    
        self.job_set_resources = ResourceFactory().get_resources_for_dataset(
            'job', 
            in_storage = self.in_storage, 
            out_storage = self.out_storage,
            in_table_name_pair = ('jobs',None),
            out_table_name_pair = ('jobs_exported',None),
            attributes_pair = (None,AttributeType.PRIMARY),
            id_name_pair = ('job_id','job_id'), 
            nchunks_pair = (1,1), 
            debug_pair = (1,None)
            )
            
    def tearDown(self):
        if os.path.exists(self.base_cache_dir):
            rmtree(self.base_cache_dir)
        
    def test_err_when_asking_for_attribute_that_is_not_in_cache(self):
        job_set = Dataset(self.job_set_resources, dataset_name="jobs")
        job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY)
        job_set.flush_dataset()
        job_set.get_attribute('job_id')
        self.assertRaises(NameError, job_set.get_attribute, 'attribute_that_does_not_exist')
            
    def test_compute_one_variable_when_asking_for_attribute_that_is_not_in_cache(self):
        job_set = Dataset(self.job_set_resources, dataset_name="jobs")
        job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY)
        job_set.flush_dataset()
        job_id_variable_name = VariableName('opus_core.jobs.attribute_that_does_not_exist')
        
        logger.enable_hidden_error_and_warning_words()
        try:
            self.assertRaises(StandardError, job_set._compute_one_variable, job_id_variable_name)
            
        finally:
            logger.enable_hidden_error_and_warning_words()
            
    def test_flush_dataset_correct_flags(self):
        job_set = Dataset(self.job_set_resources, dataset_name="jobs")
        self.assert_(not 'job_id' in job_set.attribute_boxes)
        
        job_set.get_attribute("job_id")
        self.assert_(job_set.attribute_boxes["job_id"].is_in_memory())
        self.assert_(not job_set.attribute_boxes["job_id"].is_cached())
        
        job_set.flush_dataset()
        self.assert_(not job_set.attribute_boxes["job_id"].is_in_memory())
        self.assert_(job_set.attribute_boxes["job_id"].is_cached())
        
        job_set.get_attribute("job_id")
        self.assert_(job_set.attribute_boxes["job_id"].is_in_memory())
        self.assert_(job_set.attribute_boxes["job_id"].is_cached())
        
    def test_flush_dataset_correct_data(self):
        job_set = Dataset(self.job_set_resources, dataset_name="jobs")
        job_set.add_attribute(self.job_id, "job_id", metadata=AttributeType.PRIMARY)
        job_set.add_attribute(self.expected_sic_data, "sic", metadata=AttributeType.COMPUTED)
        job_set.flush_dataset()
        returned_sic_data = job_set.get_attribute("sic")
        returned_id_data = job_set.get_attribute("job_id")
        self.assert_(ma.allequal(returned_id_data,self.job_id))
        self.assert_(ma.allequal(returned_sic_data,self.expected_sic_data))
Esempio n. 34
0
    refinements = None
    refinements_storage = None
    if options.refinements_directory is not None:
        refinements_storage = StorageFactory().get_storage(
            'flt_storage', storage_location=options.refinements_directory)
        refinements = DatasetFactory().search_for_dataset(
            'refinement',
            package_order,
            arguments={'in_storage': refinements_storage})
        years = refinements.get_attribute('year')
        if start_year is None: start_year = years.min()
        if end_year is None: end_year = years.max()

    simulation_state = SimulationState()
    simulation_state.set_cache_directory(options.cache_directory)
    simulation_state.set_current_time(start_year)
    attribute_cache = AttributeCache()
    dataset_pool = SessionConfiguration(
        new_instance=True,
        package_order=package_order,
        in_storage=attribute_cache).get_dataset_pool()

    if refinements is None:
        refinements = dataset_pool.get_dataset('refinement')
        years = refinements.get_attribute('year')
        if start_year is None: start_year = years.min()
        if end_year is None: end_year = years.max()

    for year in range(start_year, end_year + 1):
        logger.start_block("Doing refinement for %s" % year)
        simulation_state.set_current_time(year)
Esempio n. 35
0
  def run(my, cache_dir=None, year=None):
    global parcel_set, z, node_set, submarket, esubmarket, isr, parcelfees, costdiscount

    '''
    if 0:
        z = Zoning()
        p = Parcels()
        cPickle.dump((z,p),open('databaseinfo.jar','w'))
    else:
        print "Reading db info from jar..."
        z,p = cPickle.load(open(os.path.join(os.environ['OPUS_DATA'],'bay_area_parcel/databaseinfo.jar')))
    '''

    ## when developer_model is invoked alone from command line
    if cache_dir is not None and year is not None:
        #data_path = paths.get_opus_data_path_path()
        cache_dir = os.path.join(data_path, 'bay_area_parcel/runs/run_105.2012_05_03_09')
        #year = 2011
        simulation_state = SimulationState()
        simulation_state.set_current_time(year)
        SimulationState().set_cache_directory(cache_dir)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(new_instance=True,
                             package_order=['bayarea', 'urbansim_parcel',
                                            'urbansim', 'opus_core'],
                             in_storage=attribute_cache
                            ).get_dataset_pool()
        
    dataset_pool = SessionConfiguration().get_dataset_pool()
    current_year = SimulationState().get_current_time()
    cache_dir = SimulationState().get_cache_directory()

    parcel_set = dataset_pool.get_dataset('parcel')
    building_set = dataset_pool.get_dataset('building')
    household_set = dataset_pool.get_dataset('household')
    node_set = dataset_pool.get_dataset('node')
    unit_set = dataset_pool.get_dataset('residential_unit')
    submarket = dataset_pool.get_dataset('submarket')
    esubmarket = dataset_pool.get_dataset('employment_submarket')
    #print numpy.array(unit_set['rent'] > 0).size
    #for i in range(unit_set.size()):
    #    print unit_set['unit_price'][i], unit_set['unit_sqft'][i]
    
    #transit_set = dataset_pool.get_dataset('transit_station')
    #print dataset_pool.datasets_in_pool()
    '''
    from bayarea.node import transit_type_DDD_within_DDD_meters
    for i in range(7):
        print i
        v = transit_type_DDD_within_DDD_meters.transit_type_DDD_within_DDD_meters(i,500)
        d = v.compute(dataset_pool)
        print d.size
        found = d[numpy.nonzero(d)]
        print found.size
    sys.exit()
    '''
   
    compute_devmdl_accvars(node_set) 

    ######################
    ### CAREFUL - THIS IS WHERE SCNERARIO SPECIFIC INFO GOES
    ######################

    current_year = SimulationState().get_current_time()
    z = Zoning(my.scenario,current_year)
    isr = None
    if my.scenario.startswith('Transit'): isr = ISR()
    parcelfees = None
    if my.scenario.startswith('Preferred'):
        parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_preferred'))
    #elif my.scenario.startswith('Transit'):
    #    parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_transit'))
    elif my.scenario.startswith('Equity'):
        parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_equity'))
    elif my.scenario.startswith('Infill'):
        parcelfees = ParcelFees(dataset_pool.get_dataset('parcelfees_infill'))
    costdiscount = 0.0
    if not my.scenario.startswith('No Project') and not my.scenario.startswith('Equity'):
        costdiscount = .01

    #################################
    #################################
    from numpy import logical_not
    empty_parcels = parcel_set.compute_variables("(parcel.number_of_agents(building)==0)*(parcel.node_id>0)*(parcel.shape_area>80)")
    res_parcels = parcel_set.compute_variables("(parcel.number_of_agents(building)>0)*(parcel.node_id>0)*(parcel.shape_area>80)")
    bart_parcels = parcel_set.compute_variables("(parcel.disaggregate(bayarea.node.transit_type_1_within_800_meters))")
    caltrain_parcels = parcel_set.compute_variables("(parcel.disaggregate(bayarea.node.transit_type_2_within_800_meters))")
    #pda_parcels = parcel_set.compute_variables("(parcel.pda_id > -1)*(numpy.logical_not(parcel.county_id==38))")
    pda_parcels = parcel_set.compute_variables("(parcel.pda_id > -1)")
    SAMPLE_RATE = 0.01
    from opus_core.sampling_toolbox import sample_noreplace
    from numpy import concatenate, where
    sampled_res_parcels_index = sample_noreplace(where(res_parcels)[0], int(SAMPLE_RATE * parcel_set.size()))
    test_parcels = concatenate((where(empty_parcels==1)[0], sampled_res_parcels_index,where(bart_parcels==1)[0],where(caltrain_parcels==1)[0],where(pda_parcels==1)[0]))
    test_parcels = sample_noreplace(test_parcels, int(.08 * 154877))
    numpy.random.shuffle(test_parcels)
 
    """
    sample = []
    for i in range(parcel_set.size()):
        if empty_parcels[i] == 1:
            sample.append(i+1)
        elif res_parcels[i] == 1 and numpy.random.ranf() < SAMPLE_RATE:
            sample.append(i+1)
    test_parcels = array(sample)
    """

    #empty_parcels = parcel_set.compute_variables("(parcel.node_id>0)*(parcel.shape_area>80)")
    #test_parcels = numpy.where(empty_parcels==1)[0]
    
    global building_sqft, building_price
    building_sqft = parcel_set.compute_variables('parcel.aggregate(building.building_sqft)')

    building_price_owner_residential=parcel_set.compute_variables('building_price_owner_res=parcel.aggregate((residential_unit.sale_price)*(residential_unit.sale_price>0),intermediates=[building])')
    building_price_rental_residential=parcel_set.compute_variables('building_price_rental_res=parcel.aggregate((residential_unit.rent*12*17.9)*(residential_unit.rent>0),intermediates=[building])')
    building_price_nonresidential = parcel_set.compute_variables('building_price_nonres = parcel.aggregate((building.non_residential_rent*7*building.non_residential_sqft))')
    sum_building_p = parcel_set.compute_variables('sum_building_price = parcel.building_price_owner_res + parcel.building_price_rental_res + building_price_nonres')
    ##sum_building_price = building_price_owner_residential + building_price_rental_residential + building_price_nonresidential
    vacant_parcel = parcel_set.compute_variables('parcel.sum_building_price == 0')
    price_per_sqft_land = (parcel_set.compute_variables('parcel.disaggregate(safe_array_divide(zone.aggregate(parcel.sum_building_price),zone.aggregate(building.building_sqft)))'))/4
    parcel_land_area = parcel_set.compute_variables('parcel.shape_area')
    vacant_land_price = vacant_parcel*price_per_sqft_land*parcel_land_area
    building_price = sum_building_p + vacant_land_price


    ##sum_building_price = building_price_owner_residential + building_price_rental_residential + building_price_nonresidential

    #land_price = (sum_building_p==0) * (parcel_set.compute_variables('parcel.disaggregate(safe_array_divide(zone.aggregate(parcel.sum_building_price),zone.aggregate(building.building_sqft)))'))* parcel_set.compute_variables('parcel.land_area')

    #info used to match from proposal_component to submarket
    parcel_set.compute_variables(["bayarea.parcel.within_half_mile_transit", 
                                  "bayarea.parcel.schooldistrict",
                                  "bayarea.parcel.jurisdiction_id",
                                 ])
    #test_parcels = array([i+1 for i in range(parcel_set.size())])
    #test_parcels = test_parcels[:10000]

    #test_parcels = test_parcels[:150]
    #test_parcels = numpy.where(parcel_set['parcel_id'] == 1608920)[0]
    #print test_parcels
    logger.log_status("%s parcels to test" % (test_parcels.size))
    print "Num of parcels:", test_parcels.size
    import time

    HOTSHOT = 0
    if MP:
        from multiprocessing import Pool, Queue
        pool = Pool(processes=4)

    import hotshot, hotshot.stats#, test.pystone
    if HOTSHOT:
        prof = hotshot.Profile('devmdl.prof')
        prof.start()

    outf = open(os.path.join(cache_dir,'buildings-%d.csv' % current_year),'w')
    outf.write('pid,county,dev_btype,stories,sqft,res_sqft,nonres_sqft,tenure,year_built,res_units,npv,actualfee,btype\n')
    debugf = open(os.path.join(cache_dir,'proforma-debug-%d.csv' % current_year),'w')
    bformdbg = 'county_id,far,height,max_dua,bform.sf_builtarea(),bform.sfunitsizes,bform.mf_builtarea(),bform.mfunitsizes,bform.num_units,bform.nonres_sqft,bform.buildable_area'
    otherdbg = 'isr,parcelfees,existing_sqft,existing_price,lotsize,unitsize,unitsize2,bform.sales_absorption,bform.rent_absorption,bform.leases_absorption,bform.sales_vacancy_rates,bform.vacancy_rates'
    debugf.write('pid,btype,npv,actualfee,pricesf,pricemf,rentsf,rentmf,rentof,rentret,rentind,%s,%s\n' % (bformdbg,otherdbg))
    t1 = time.time()
    aggd = {}

    def chunks(l, n):
        for i in xrange(0, len(l), n):
           yield l[i:i+n]

    for test_chunk in chunks(test_parcels,1000):

        print "Executing CHUNK"

        sales_absorption = submarket.compute_variables('bayarea.submarket.sales_absorption')
        rent_absorption = submarket.compute_variables('bayarea.submarket.rent_absorption')
        vacancy_rates = submarket.compute_variables('bayarea.submarket.vacancy_rates')
        leases_absorption = esubmarket.compute_variables('bayarea.employment_submarket.leases_absorption')
        nr_vacancy_rates = esubmarket.compute_variables('bayarea.employment_submarket.vacancy_rates')

        if HOTSHOT:
            results = []
            for p in test_chunk: 
                r = process_parcel(p)
                if r <> None and r <> -1: results.append(list(r))
        else:
            if MP:
                results = pool.map(process_parcel,test_chunk)
            else:
                results = [process_parcel(p) for p in test_chunk]
            results_bldg = [list(x[0]) for x in results if x <> None and x[0] <> -1]
            #each row of units represents number of units of [1, 2, 3, 4] bedrooms
            units = array([x[1][0] for x in results if x <> None and x[0] <> -1])
            sqft_per_unit = array([x[1][1] for x in results if x <> None and x[0] <> -1])
            for x in results:
                if x <> None: 
                    debugf.write(x[2])

            results = results_bldg
        for result in results:
            #print result
            out_btype = devmdltypes[int(result[2])-1]
            outf.write(string.join([str(x) for x in result]+[str(out_btype)],sep=',')+'\n')

        ##TODO: id of buildings to be demolished
    
        buildings_to_demolish = []
        idx_buildings_to_demolish = building_set.get_id_index(buildings_to_demolish)
        
        JAMM = JoinAttributeModificationModel()
        JAMM.run(household_set, building_set, index=idx_buildings_to_demolish, value=-1)

        building_set.remove_elements(idx_buildings_to_demolish)
        column_names = ["parcel_id","county","building_type_id","stories",
                    "building_sqft","residential_sqft","non_residential_sqft",
                    "tenure","year_built","residential_units"]
        buildings_data = copy.deepcopy(results)
        for i in range(len(buildings_data)):
            buildings_data[i][2] = devmdltypes[int(buildings_data[i][2])-1]
        buildings_data = array(buildings_data)
        new_buildings = {}
        available_bldg_id = building_set['building_id'].max() + 1
        new_bldg_ids = arange(available_bldg_id, available_bldg_id+buildings_data.shape[0],
                              dtype=building_set['building_id'].dtype)
        if buildings_data.size > 0:
            for icol, col_name in enumerate(column_names):
                if col_name in building_set.get_known_attribute_names():
                    ddtype = building_set[col_name].dtype
                    new_buildings[col_name] = (buildings_data[:, icol]).astype(ddtype)
                else:
                    #if the col_name is not in dataset, it will be discarded anyway
                    pass

            new_buildings['building_id'] = new_bldg_ids
            # recode tenure: 1 - rent, 2 - own from 0 - own, 1 - rent
            new_buildings['tenure'][new_buildings['tenure']==0] = 2
            ## pid is the index to parcel_set; convert them to actual parcel_id
            #new_buildings['parcel_id'] = parcel_set['parcel_id'][new_buildings['parcel_id']]
            building_set.add_elements(new_buildings, require_all_attributes=False,
                                      change_ids_if_not_unique=True)
            building_set.flush_dataset()

            assert new_bldg_ids.size == units.shape[0] == sqft_per_unit.shape[0]
            units_bldg_ids = repeat(new_bldg_ids, 4)
            bedrooms = array([1, 2, 3, 4] * units.size)
            units = round(units.ravel())
            sqft_per_unit = sqft_per_unit.ravel()
            new_units = {'building_id': array([], dtype='i4'),
                         'bedrooms': array([], dtype='i4'),
                         'sqft_per_unit': array([], dtype='i4')
                        }
            
            for i_unit, unit in enumerate(units):
                if unit <= 0:
                  continue
                new_units['building_id'] = concatenate((new_units['building_id'],
                                                        repeat(units_bldg_ids[i_unit], unit))
                                                       )
                new_units['bedrooms'] = concatenate((new_units['bedrooms'],
                                                     repeat(bedrooms[i_unit], unit))
                                                    )
                new_units['sqft_per_unit'] = concatenate((new_units['sqft_per_unit'],
                                                          repeat(sqft_per_unit[i_unit], unit))
                                                         )

            ##force dtype conversion to the same dtype as unit_set
            for col_name in ['building_id', 'bedrooms', 'sqft_per_unit']:
                if col_name in unit_set.get_known_attribute_names():
                    new_units[col_name] = new_units[col_name].astype(unit_set[col_name].dtype)

            unit_set.add_elements(new_units, require_all_attributes=False,
                                  change_ids_if_not_unique=True)
            unit_set.flush_dataset()

        for result in results:
            units = result[-1]
            nonres_sqft = 1 #result[6]/1000.0
            county = result[1]
            btype = result[2]
            key = (county,btype)
            aggd.setdefault(key,0)
            if btype < 7: aggd[key] += units
            else: aggd[key] += nonres_sqft
            aggd.setdefault(county,0)
            aggd[county] += units
   
    aggf = open('county_aggregations-%d.csv' % current_year,'w')
    county_names = {49:'son',41:'smt',1:'ala',43:'scl',28:'nap',38:'sfr',7:'cnc',48:'sol',21:'mar',0:'n/a'}
    btype_names = {1:'SF',2:'SFBUILD',3:'MF',4:'MXMF',5:'CONDO',6:'MXC',7:'OF',8:'MXO',9:'CHOOD',10:'CAUTO',11:'CBOX',12:'MANU',13:'WHE'}
    aggf.write('county,total,'+string.join(btype_names.values(),sep=',')+'\n')
    for county in [38,41,43,1,7,48,28,49,21]:
        aggf.write(county_names[county]+','+str(aggd.get(county,0)))
        for btype in btype_names.keys():
            key = (county,btype)
            val = aggd.get(key,0) 
            aggf.write(','+str(val))
        aggf.write('\n')

    t2 = time.time()

    print "Finished in %f seconds" % (t2-t1)
    print "Ran optimization %d times" % devmdl_optimize.OBJCNT
    global NOZONINGCNT, NOBUILDTYPES
    print "Did not find zoning for parcel %d times" % NOZONINGCNT
    print "Did not find building types for parcel %d times" % NOBUILDTYPES
    print "DONE"

    my.post_run() #remove price_shifter & cost_shifter to avoid them being cached

    if HOTSHOT:
        prof.stop()
        prof.close()
        stats = hotshot.stats.load('devmdl.prof')
        stats.strip_dirs()
        stats.sort_stats('cumulative')
        stats.print_stats(20)
Esempio n. 36
0
    def run(self, config, year, storage_type='sql'):
        """ 
        """
        
        tm_config = config['travel_model_configuration']
        database_server_config = tm_config.get("database_server_configuration", 'simtravel_database_server')
        database_name = tm_config.get("database_name", 'mag_zone_baseyear')
        
        cache_directory = config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        dataset_pool = SessionConfiguration(new_instance=True,
                                            package_order=config['dataset_pool_configuration'].package_order,
                                            in_storage=attribute_cache).get_dataset_pool()

        if storage_type == 'sql':
            db_server = DatabaseServer(DatabaseConfiguration(
                                                             database_name = database_name,
                                                             database_configuration = database_server_config
                                                             )
                                                             )
            if not db_server.has_database(database_name): 
                print "Db doesn't exist creating one"
                db_server.create_database(database_name)
            db = db_server.get_database(database_name) 
            output_storage = sql_storage(storage_location = db)
        elif storage_type == 'csv':
            csv_directory = os.path.join(cache_directory, 'csv', str(year))
            output_storage = csv_storage(storage_location=csv_directory)
        else:
            raise ValueError, "Unsupported output storage type {}".format(storage_type)
                                                            
        logger.start_block('Compute and export data to openAMOS...')

        hh = dataset_pool.get_dataset('household')
        hh_recs = dataset_pool.get_dataset('households_recs')
        #hh_recs.add_attribute(0,"htaz1")
        #hh_recs.flush_dataset()
        #syn_hh = dataset_pool.get_dataset('synthetic_household')

        hh_variables = ['houseid=household.household_id',
                        "hhsize=household.number_of_agents(person)",
                        "one=(household.household_id>0).astype('i')",
                        "inclt35k=(household.income<35000).astype('i')",
                        "incge35k=(household.income>=35000).astype('i')",
                        "incge50k=(household.income>=50000).astype('i')",
                        "incge75k=(household.income>=75000).astype('i')",
                        "incge100k=(household.income>=100000).astype('i')",
                        "inc35t50=((household.income>=35000) & (household.income<50000)).astype('i')",
                        "inc50t75=((household.income>=50000) & (household.income<75000)).astype('i')",
                        "inc75t100=((household.income>=75000) & (household.income<100000)).astype('i')",
                        'htaz = ((houseid>0)*(household.disaggregate(building.zone_id)-100) + (houseid<=0)*0)',
                        #'htaz = ((houseid>0) & (htaz1>100))*(htaz1-100)+((houseid>0) & (htaz1==-1))*1122',
                        "withchild = (household.aggregate(person.age<18)>0).astype('i')",
                        "noc = household.aggregate(person.age<18)",
                        "numadlt = household.aggregate(person.age>=18)",
                        "hinc=household.income",
                        "wif=household.workers",
                        #"wif=household.aggregate(mag_zone.person.is_employed)",
                        'numwrkr=household.workers',
                        #'numwrkr=household.aggregate(mag_zone.person.is_employed)',
                        'nwrkcnt=household.number_of_agents(person) - household.workers',
                        #'nwrkcnt=household.number_of_agents(person) - household.aggregate(mag_zone.person.is_employed)',

                        'yrbuilt=mag_zone.household.yrbuilt',
                        'mag_zone.household.sparent',
                        'mag_zone.household.rur',
                        'mag_zone.household.urb',
                        'zonetid = household.disaggregate(building.zone_id)',
                        ]
        
        self.prepare_attributes(hh, hh_variables)
        attrs_to_export = hh_recs.get_known_attribute_names()
       
        self.write_dataset(hh, attrs_to_export, output_storage)
        dataset_pool._remove_dataset(hh.dataset_name)

        persons = dataset_pool.get_dataset('person')
        persons.out_table_name_default = 'persons'

        # Recoding invalid work and school locations to some random valid values
        persons_recs = dataset_pool.get_dataset('persons_recs')
        persons_recs.add_attribute(persons['person_id'],"personuniqueid")
        persons_recs.add_attribute(persons['marriage_status'],"marstat")
        persons_recs.add_attribute(persons['student_status'],"schstat")


        """
        persons_recs.add_attribute(persons['wtaz0'],"htaz_act")
        persons_recs.add_attribute(0,"wtaz_rec")
        persons_recs.add_attribute(0,"wtaz_rec1")
        persons_recs.add_attribute(0,"wtaz_rec2")

        persons_recs.add_attribute(0,"wtaz1_1")
        persons_recs.add_attribute(0,"wtaz1_2")
        persons_recs.add_attribute(0,"wtaz1_3")
        #persons_recs.add_attribute(persons['student_status'],"schstat")
        """

        persons_recs.add_attribute(0,"wtaz1")
        persons_recs.add_attribute(0,"htaz")
        persons_recs.add_attribute(0,"schtaz1")

        persons_recs.flush_dataset()

        #syn_persons = dataset_pool.get_dataset('synthetic_person')
        persons_variables = ['personid=mag_zone.person.member_id',
                             'personuniqueid=person.person_id',
                             'houseid=person.household_id',
                             "one=(person.person_id>0).astype('i')",
                             'trvtime=mag_zone.person.travel_time_from_home_to_work',
                             'timetowk=mag_zone.person.travel_time_from_home_to_work',
                             #'mag_zone.person.tmtowrk',
                             #'tmtowrk=person.disaggregate(synthetic_person.tmtowrk)',
                             "ag5t10=((person.age>=5) & (person.age<=10)).astype('i')",
                             "ag11t14=((person.age>=11) & (person.age<=14)).astype('i')",
                             "ag15t17=((person.age>=15) & (person.age<=17)).astype('i')",
                             "ag18t24=((person.age>=18) & (person.age<=24)).astype('i')",
                             "ag25t34=((person.age>=25) & (person.age<=34)).astype('i')",
                             "ag35t44=((person.age>=35) & (person.age<=44)).astype('i')",
                             "ag45t54=((person.age>=45) & (person.age<=54)).astype('i')",
                             "ag55t64=((person.age>=55) & (person.age<=64)).astype('i')",
                             "agge65=(person.age>=65).astype('i')",

                             "ag12t17=((person.age>=12) & (person.age<=17)).astype('i')",
                             "ag5t14=((person.age>=5) & (person.age<=14)).astype('i')",
                             "agge15=(person.age>=15).astype('i')",

                             "wrkr=(person.employment_status==1).astype('i')",
                             "isemploy=(person.employment_status==1).astype('i')",
                             "fulltim=(mag_zone.person.full_time==1).astype('i')",
                             'parttim=mag_zone.person.part_time',

                             'htaz = ((houseid>0)*(person.disaggregate(building.zone_id, intermediates=[household])-100) + (houseid<=0)*0)',

                             'wtaz1=(person.wtaz <= 0)*0 + (person.wtaz > 0)*(person.wtaz-100)',
                       
                             "presch = ((person.age < 5)&(houseid>0)).astype('i')",
                             "mag_zone.person.schstat",


                             'schtaz1 = (person.schtaz <= 0)*0 + (person.schtaz > 0)*(person.schtaz-100)',
                             'marstat = person.marriage_status',

                             'enroll = person.student_status',
                             'grade = person.student_status & person.education',
                             'educ = person.education',
                             "male = (person.sex==1).astype('i')",
                             "female = (person.sex==2).astype('i')",

                             "coled = (person.education >= 10).astype('i')",

                             'race1 = person.race',
                             "white = (person.race == 1).astype('i')",
                             'person.hispanic'
                             ]
        self.prepare_attributes(persons, persons_variables)

        attrs_to_export = persons_recs.get_known_attribute_names()

        self.write_dataset(persons, attrs_to_export, output_storage)
        dataset_pool._remove_dataset(persons.dataset_name)

        zones = dataset_pool.get_dataset('zone')
        zones_variables = [
                             "retail_employment=zone.aggregate(mag_zone.job.sector_group=='retail')",
                             "public_employment=zone.aggregate(mag_zone.job.sector_group=='public')",
                             "office_employment=zone.aggregate(mag_zone.job.sector_group=='office')",
                             "industrial_employment=zone.aggregate(mag_zone.job.sector_group=='individual')",
                             "other_employment=zone.aggregate(mag_zone.job.sector_group=='other')",

                             "retail_employment_density=zone.aggregate(mag_zone.job.sector_group=='retail')/zone.acres",
                             "public_employment_density=zone.aggregate(mag_zone.job.sector_group=='public')/zone.acres",
                             "office_employment_density=zone.aggregate(mag_zone.job.sector_group=='office')/zone.acres",
                             "industrial_employment_density=zone.aggregate(mag_zone.job.sector_group=='individual')/zone.acres",
                             "other_employment_density=zone.aggregate(mag_zone.job.sector_group=='other')/zone.acres",

                             "total_area=zone.acres",

                             "lowest_income=zone.aggregate(household.income < scoreatpercentile(household.income, 20))",
                             "low_income=zone.aggregate(household.income < scoreatpercentile(household.income, 40))",
                             "high_income=zone.aggregate(household.income > scoreatpercentile(household.income, 80))",

                             #"institutional_population=zone.disaggregate(locations.institutional_population)",
                             #"groupquarter_households=zone.disaggregate(locations.groupquarter_households)",

                             "residential_households=zone.number_of_agents(household)",

                             "locationid=zone.zone_id",
                             ]
        
        locations = dataset_pool['locations']
        self.prepare_attributes(zones, zones_variables, dataset2=locations)
        attrs_to_export = locations.get_known_attribute_names()

        self.write_dataset(locations, attrs_to_export, output_storage)
        dataset_pool._remove_dataset(locations.dataset_name)
        #raw_input("check location block")

        logger.end_block()
Esempio n. 37
0
class ModelSystem(object):
    """
    Uses the information in configuration to run/estimate a set of models for given set of years.
    """

    def __init__(self):
        self.running = False
        self.forked_processes = []
        self.running_conditional = threading.Condition()

    def run(
        self,
        resources,
        write_datasets_to_cache_at_end_of_year=True,
        log_file_name="run_model_system.log",
        cleanup_datasets=True,
    ):
        """Entries in resources: (entries with no defaults are required)
               models - a list containing names of models to be run. Each name
                           must correspond to the name of the module/class of that model. Default(object): None
               years - a tuple (start year, end year)
               debuglevel - an integer. The higher the more output will be printed. Default: 0
               expression_library - a dictionary.  The keys in the dictionary are pairs (dataset_name, variable_name)
               and the values are the corresponding expressions.  The model system needs to set the expression library
               (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions
               in the xml expression library.  Default: None
        This method is called both to start up the simulation for all years, and also for each year
        when running with one process per year.  In the latter case, 'years' consists of just
        (current_year, current_year) rather than the real start and end years for the simulation.
        """
        if not isinstance(resources, Resources):
            raise TypeError, "Argument 'resources' must be of type 'Resources'."
        logger_settings = resources.get("log", {"tags": [], "verbosity_level": 3})
        logger.set_tags(logger_settings.get("tags", []))
        logger.set_verbosity_level(logger_settings.get("verbosity_level", 3))
        self.simulation_state = SimulationState()
        self.simulation_state.set_low_memory_run(resources.get("low_memory_mode", False))
        self.simulation_state.set_start_time(resources.get("base_year", 0))
        self.run_year_namespace = {}

        if resources.get("cache_directory", None) is not None:
            self.simulation_state.set_cache_directory(resources["cache_directory"])

        if "expression_library" in resources:
            VariableFactory().set_expression_library(resources["expression_library"])

        if resources.get("sample_input", False):
            self.update_config_for_multiple_runs(resources)

        cache_directory = self.simulation_state.get_cache_directory()
        log_file = os.path.join(cache_directory, log_file_name)
        logger.enable_file_logging(log_file, verbose=False)
        try:
            logger.log_status("Cache Directory set to: " + cache_directory)

            with logger.block("Start simulation run"):
                models = resources.get("models", [])
                models_in_years = resources.get("models_in_year", {})

                resources.check_obligatory_keys(["years"])

                years = resources["years"]
                if (not isinstance(years, tuple)) and (not isinstance(years, list)):
                    raise TypeError, "Entry 'years' in resources must be a tuple."

                if len(years) < 2:
                    print years
                    raise StandardError, "Entry 'years' in resources must be of length at least 2."

                start_year = years[0]
                end_year = years[-1]

                debuglevel = resources.get("debuglevel", 0)
                seed_values = resources.get("seed", NO_SEED)

                logger.log_status("random seed = %s" % str(seed_values))
                seed(seed_values)

                for year in range(start_year, end_year + 1):
                    with logger.block("Starting simulation for year " + str(year)):
                        self.simulation_state.set_current_time(year)
                        SessionConfiguration().get_dataset_pool().remove_all_datasets()
                        logger.disable_file_logging(log_file)
                        try:
                            if models_in_years.get(year, None) is not None:
                                models_to_run = models_in_years[year]
                            else:
                                models_to_run = models
                            self._run_year(
                                year=year,
                                models=models_to_run,
                                simulation_state=self.simulation_state,
                                debuglevel=debuglevel,
                                resources=resources,
                                write_datasets_to_cache_at_end_of_year=write_datasets_to_cache_at_end_of_year,
                                cleanup_datasets=cleanup_datasets,
                            )
                        finally:
                            logger.enable_file_logging(log_file, verbose=False)
                        collect()

        finally:
            logger.disable_file_logging(log_file)

    def flush_datasets(self, dataset_names, after_model=False):
        dataset_pool = SessionConfiguration().get_dataset_pool()
        for dataset_name in dataset_names:
            if dataset_pool.has_dataset(dataset_name):
                self.flush_dataset(dataset_pool.get_dataset(dataset_name), after_model=after_model)

    def flush_dataset(self, dataset, after_model=False):
        """Write the PRIMARY attributes of this dataset to the cache."""
        if dataset and isinstance(dataset, Dataset):
            # Do not flush after model if not necessary
            if after_model:
                if len(dataset.get_attribute_names()) <= len(dataset.get_id_name()):
                    return
                if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and (
                    len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name())
                ):
                    dataset.delete_computed_attributes()
                    return
            dataset.delete_computed_attributes()
            dataset.load_and_flush_dataset()

    def flush_datasets_after_model(self, resources):
        if resources.get("flush_variables", False):
            AttributeCache().delete_computed_tables()
            # this will also delete computed attributes
            datasets_to_cache = SessionConfiguration().get_dataset_pool().datasets_in_pool().keys()
        else:
            datasets_to_cache = resources.get("datasets_to_cache_after_each_model", [])
        self.flush_datasets(datasets_to_cache, after_model=True)

    def _run_year(
        self,
        year,
        models,
        simulation_state,
        debuglevel,
        resources,
        write_datasets_to_cache_at_end_of_year,
        cleanup_datasets=True,
    ):
        """
        Assumes that all datasets resides in the cache directory in binary format.
        """
        try:
            import wingdbstub
        except:
            pass
        self.vardict = {}
        log_file_name = os.path.join(simulation_state.get_cache_directory(), "year_%s_log.txt" % year)
        logger.enable_file_logging(log_file_name, "w")
        try:
            logger.start_block("Simulate year %s" % year)
            try:
                base_year = resources["base_year"]
                if year == base_year:
                    year_for_base_year_cache = year  # case of estimation
                else:
                    year_for_base_year_cache = year - 1
                cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache)
                self.vardict["cache_storage"] = cache_storage
                base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year)
                self.vardict["base_cache_storage"] = base_cache_storage
                simulation_state.set_flush_datasets(resources.get("flush_variables", False))
                SessionConfiguration()["simulation_year"] = year
                SessionConfiguration()["debuglevel"] = debuglevel
                datasets_to_preload_in_year = resources.get("datasets_to_preload_in_year", {})
                if datasets_to_preload_in_year.get(year, None) is not None:
                    datasets_to_preload = datasets_to_preload_in_year[year]
                else:
                    datasets_to_preload = resources.get("datasets_to_preload", {})
                for dataset_name in datasets_to_preload:
                    SessionConfiguration().get_dataset_from_pool(dataset_name)
                models_configuration = resources.get("models_configuration", {})
                dataset_pool = SessionConfiguration().get_dataset_pool()
                datasets = {}
                for dataset_name, its_dataset in dataset_pool.datasets_in_pool().iteritems():
                    self.vardict[dataset_name] = its_dataset
                    datasets[dataset_name] = its_dataset
                    exec "%s=its_dataset" % dataset_name

                # This is needed. It resides in locals()
                # and is passed on to models as they run.
                ### TODO: There has got to be a better way!
                model_resources = Resources(datasets)
                n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run(
                    models, models_configuration
                )
                self.run_year_namespace = locals()
                # ==========
                # Run the models.
                # ==========
                model_number = -1
                for model_entry in models:
                    # list 'models' can be in the form:
                    # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
                    #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
                    #                                      'commercial']}},
                    #  {'model_name_3': ['estimate', 'run']},
                    #  'model_name_4',
                    #  {'model_name_5': {'group_members': 'all'}}
                    # ]
                    # get list of methods to be processed evtl. for each group member
                    if isinstance(model_entry, dict):
                        model_name, value = model_entry.items()[0]
                        if not isinstance(value, dict):  # is a model group
                            processes = value
                            if not isinstance(processes, list):
                                processes = [processes]
                    else:  # in the form 'model_name_4' in the comment above
                        model_name = model_entry
                        processes = ["run"]
                    group_member = None
                    model_group = model_group_members_to_run[model_name][1]
                    last_member = max(1, len(model_group_members_to_run[model_name][0].keys()))
                    for imember in range(last_member):
                        controller_config = models_configuration[model_name]["controller"]
                        model_configuration = models_configuration[model_name]
                        if model_group_members_to_run[model_name][0].keys():
                            group_member_name = model_group_members_to_run[model_name][0].keys()[imember]
                            group_member = ModelGroupMember(model_group, group_member_name)
                            processes = model_group_members_to_run[model_name][0][group_member_name]
                            member_model_name = "%s_%s" % (group_member_name, model_name)
                            if member_model_name in models_configuration.keys():
                                model_configuration = models_configuration[member_model_name]
                                if "controller" in model_configuration.keys():
                                    controller_config = model_configuration["controller"]
                        datasets_to_preload_for_this_model = controller_config.get(
                            "_model_structure_dependencies_", {}
                        ).get("dataset", [])
                        for dataset_name in datasets_to_preload_for_this_model:
                            try:
                                if not dataset_pool.has_dataset(dataset_name) or (dataset_name not in datasets.keys()):
                                    ds = dataset_pool.get_dataset(dataset_name)
                                    self.vardict[dataset_name] = ds
                                    datasets[dataset_name] = ds
                                    exec "%s=ds" % dataset_name
                            except:
                                logger.log_warning("Failed to load dataset %s." % dataset_name)
                        # import part
                        if "import" in controller_config.keys():
                            import_config = controller_config["import"]
                            for import_module in import_config.keys():
                                exec ("from %s import %s" % (import_module, import_config[import_module]))

                        # gui_import_replacements part
                        # This is a temporary hack -- replicates the functionality of the "import" section
                        # for use with the GUI.  The contents of this part of the config is a dictionary.
                        # Keys are names of models (not used here).  Values are 2 element pairs.
                        # The first element is a name and the second is a value.  Bind the name to the value.
                        if "gui_import_replacements" in controller_config.keys():
                            import_replacement_config = controller_config["gui_import_replacements"]
                            for model_name in import_replacement_config.keys():
                                pair = import_replacement_config[model_name]
                                temp = pair[1]
                                exec ("%s = temp") % pair[0]

                        # init part
                        model = self.do_init(locals())

                        # estimate and/or run part
                        for process in processes:
                            model_number = model_number + 1
                            # write status file
                            model.set_model_system_status_parameters(
                                year, n_models, model_number, resources.get("status_file_for_gui", None)
                            )
                            model.write_status_for_gui()
                            # prepare part
                            exec (self.do_prepare(locals()))
                            processmodel_config = controller_config[process]
                            if "output" in processmodel_config.keys():
                                outputvar = processmodel_config["output"]
                            else:
                                outputvar = "process_output"
                            self.vardict[outputvar] = self.do_process(locals())
                            exec outputvar + "=self.vardict[outputvar]"

                            # check command file from gui, if the simulation should be stopped or paused
                            self.do_commands_from_gui(resources.get("command_file_for_gui", None))

                            # capture namespace for interactive estimation
                            self.run_year_namespace = locals()
                            self.flush_datasets_after_model(resources)
                            del model
                            collect()

                # Write all datasets to cache.
                if write_datasets_to_cache_at_end_of_year:
                    logger.start_block("Writing datasets to cache for year %s" % year)
                    try:
                        for dataset_name, its_dataset in (
                            SessionConfiguration().get_dataset_pool().datasets_in_pool().iteritems()
                        ):
                            self.flush_dataset(its_dataset)
                    finally:
                        logger.end_block()

            finally:
                logger.end_block()
        finally:
            logger.disable_file_logging(log_file_name)

        if cleanup_datasets:
            SessionConfiguration().delete_datasets()

    def do_init(self, parent_state):
        """Run the 'init' part of this model's configuration.
        Returns model object.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> "self":
                exec ('%s = parent_state["%s"]' % (key, key))
        init_config = parent_state["controller_config"]["init"]
        group_member = parent_state["group_member"]
        if group_member is None:  # No model group
            cmd = "%s(%s)" % (init_config["name"], self.construct_arguments_from_config(init_config))
            model = eval(cmd)
        else:  # Model belongs to a group
            model = eval(
                "%s(group_member, %s)" % (init_config["name"], self.construct_arguments_from_config(init_config))
            )
        return model

    def do_prepare(self, parent_state):
        """Prepares for the current model in the parent state's context.
        What to do is determined by the contents of the current model's controller configuration.

        controller_config is the 'controller' part of the model configuration.
        vardict is a dictionary into which the output of the model's 'prepare_output'
        method will be put.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> "self":
                exec ('%s = parent_state["%s"]' % (key, key))
        key_name = "prepare_for_%s" % process
        if key_name in controller_config.keys():
            prepare_config = controller_config[key_name]
            if "output" in prepare_config.keys():
                outputvar = prepare_config["output"]
            else:
                outputvar = "prepare_output"
            self.vardict[outputvar] = eval(
                "model.%s(%s)" % (prepare_config["name"], self.construct_arguments_from_config(prepare_config))
            )
            return '%s=self.vardict["%s"]' % (outputvar, outputvar)
        else:
            # do nothing when return value is exec'ed
            return ""

    def do_process(self, parent_state):
        for key in parent_state.keys():
            if key <> "self":
                exec ('%s = parent_state["%s"]' % (key, key))
        ev = "model.%s(%s)" % (process, self.construct_arguments_from_config(processmodel_config))
        return eval(ev)

    def get_number_of_models_and_model_group_members_to_run(self, models, models_configuration):
        """Count number_of models in the list 'models' that can include group members (each member and each process is one model)."""
        # list models can be in the form:
        # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
        #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
        #                                      'commercial']}},
        #  {'model_name_3': ['estimate', 'run']},
        #  'model_name_4',
        #  {'model_name_5': {'group_members': 'all'}}
        # ]
        number_of_models = 1
        model_group_members_to_run = {}
        for model_entry in models:
            if isinstance(model_entry, dict):
                model_name, value = model_entry.items()[0]
                if isinstance(value, dict):  # is a model group
                    if not value.keys()[0] == "group_members":
                        raise KeyError, "Key for model " + model_name + " must be 'group_members'."
                    group_members = value["group_members"]
                    model_group = None
                    if "group_by_attribute" in models_configuration[model_name]["controller"].keys():
                        group_dataset_name, group_attribute = models_configuration[model_name]["controller"][
                            "group_by_attribute"
                        ]
                        model_group = ModelGroup(
                            SessionConfiguration().get_dataset_from_pool(group_dataset_name), group_attribute
                        )
                    if not isinstance(group_members, list):
                        group_members = [group_members]
                    if group_members[0] == "_all_":  # see 'model_name_5' example above
                        if model_group is None:
                            raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name
                        group_members = model_group.get_member_names()
                    model_group_members_to_run[model_name] = [{}, model_group]
                    for member in group_members:
                        if isinstance(member, dict):
                            # see 'model_name_2' ('residential') in the comment above
                            member_name = member.keys()[0]
                            model_group_members_to_run[model_name][0][member_name] = member[member_name]
                            if not isinstance(model_group_members_to_run[model_name][0][member_name], list):
                                model_group_members_to_run[model_name][0][member_name] = [
                                    model_group_members_to_run[model_name][0][member_name]
                                ]
                            number_of_models += len(model_group_members_to_run[model_name][0][member_name])
                        else:  # see 'model_name_1'
                            model_group_members_to_run[model_name][0][member] = ["run"]
                            number_of_models += len(model_group_members_to_run[model_name][0][member])
                else:  # in the form 'model_name_3' in the comment above
                    model_group_members_to_run[model_name] = [{}, None]
                    if not isinstance(value, list):
                        number_of_models += 1
                    else:
                        number_of_models += len(value)
            else:  # in the form 'model_name_4' in the comment above
                model_group_members_to_run[model_entry] = [{}, None]
                number_of_models += 1
        return (number_of_models, model_group_members_to_run)

    def do_commands_from_gui(self, filename=None):
        if (filename is None) or not os.path.exists(filename):
            return
        while True:
            f = file(filename)
            line = f.read().strip()
            f.close()
            if line == "stop":
                logger.log_warning("Simulation stopped.")
                sys.exit()
            elif line == "resume":
                break
            elif line <> "pause":
                logger.log_warning("Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'." % line)
            time.sleep(10)

    def run_multiprocess(self, resources):
        resources = Resources(resources)
        profiler_name = resources.get("profile_filename", None)
        if resources["cache_directory"] is not None:
            cache_directory = resources["cache_directory"]
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is absolutely no good reason to be
        ###       changing the Configuration!
        resources["cache_directory"] = cache_directory

        log_file = os.path.join(cache_directory, "run_multiprocess.log")
        logger.enable_file_logging(log_file)

        start_year = resources["years"][0]
        end_year = resources["years"][-1]
        nyears = end_year - start_year + 1
        root_seed = resources.get("seed", NO_SEED)
        if resources.get("_seed_dictionary_", None) is not None:
            # This is added by the RunManager to ensure reproducibility including restarted runs
            seed_dict = resources.get("_seed_dictionary_")
            seed_array = array(map(lambda year: seed_dict[year], range(start_year, end_year + 1)))
        else:
            seed(root_seed)
            seed_array = randint(1, 2 ** 30, nyears)
        logger.log_status("Running simulation for years %d thru %d" % (start_year, end_year))
        logger.log_status("Simulation root seed: %s" % root_seed)

        for iyear, year in enumerate(range(start_year, end_year + 1)):
            success = self._run_each_year_as_separate_process(
                iyear, year, seed=seed_array[iyear], resources=resources, profiler_name=profiler_name, log_file=log_file
            )
            if not success:
                break

        self._notify_stopped()
        if profiler_name is not None:  # insert original value
            resources["profile_filename"] = profiler_name
        logger.log_status("Done running simulation for years %d thru %d" % (start_year, end_year))

    # TODO: changing of configuration
    def _run_each_year_as_separate_process(
        self, iyear, year, seed=None, resources=None, profiler_name=None, log_file=None
    ):

        logger.start_block("Running simulation for year %d in new process" % year)
        resources["years"] = (year, year)
        resources["seed"] = (seed,)

        if profiler_name is not None:
            # add year to the profile name
            resources["profile_filename"] = "%s_%s" % (profiler_name, year)

        optional_args = []
        if log_file:
            optional_args += ["--log-file-name", os.path.split(log_file)[-1]]

        success = False
        try:
            logger.disable_file_logging(log_file)
            success = self._fork_new_process(
                "opus_core.model_coordinators.model_system", resources, optional_args=optional_args
            )
            logger.enable_file_logging(log_file, verbose=False)
        finally:
            logger.end_block()

        return success

    def run_in_one_process(
        self, resources, run_in_background=False, class_path="opus_core.model_coordinators.model_system"
    ):
        resources = Resources(resources)
        if resources["cache_directory"] is not None:
            cache_directory = resources["cache_directory"]
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources["cache_directory"] = cache_directory

        self._fork_new_process("%s" % class_path, resources, delete_temp_dir=False, run_in_background=run_in_background)
        self._notify_stopped()

    def run_in_same_process(self, resources, **kwargs):
        resources = Resources(resources)
        if resources["cache_directory"] is not None:
            cache_directory = resources["cache_directory"]
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources["cache_directory"] = cache_directory

        self._notify_started()
        RunModelSystem(model_system=self, resources=resources, **kwargs)
        self._notify_stopped()

    def construct_arguments_from_config(self, config):
        key = "arguments"
        if (key not in config.keys()) or (len(config[key].keys()) <= 0):
            return ""
        arg_dict = config[key]
        result = ""
        for arg_key in arg_dict.keys():
            result += "%s=%s, " % (arg_key, arg_dict[arg_key])
        return result

    def wait_for_start(self):
        self.running_conditional.acquire()
        while not self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_finish(self):
        self.running_conditional.acquire()
        while self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_process_or_finish(self, process_index):
        self.running_conditional.acquire()
        while process_index >= len(self.forked_processes) and self.running:
            self.running_conditional.wait()
        self.running_conditional.release()
        if not self.running:
            process_index = len(self.forked_processes) - 1
        return process_index

    def _fork_new_process(self, module_name, resources, run_in_background=False, **key_args):
        self.running_conditional.acquire()
        self.running = True
        self.forked_processes.append(ForkProcess())
        key_args["run_in_background"] = run_in_background
        success = self.forked_processes[-1].fork_new_process(module_name, resources, **key_args)
        self.running_conditional.notifyAll()
        self.running_conditional.release()
        if not run_in_background:
            self.forked_processes[-1].wait()
            self.forked_processes[-1].cleanup()
        return success

    def _notify_started(self):
        self.running_conditional.acquire()
        self.running = True
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def _notify_stopped(self):
        self.running_conditional.acquire()
        self.running = False
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def update_config_for_multiple_runs(self, config):
        models_to_update = config.get("models_with_sampled_coefficients", [])
        if "models_in_year" not in config.keys():
            config["models_in_year"] = {}
        if config["models_in_year"].get(config["base_year"] + 1, None) is None:
            config["models_in_year"][config["base_year"] + 1] = config.get("models")

        for umodel in models_to_update:
            try:
                i = config["models_in_year"][config["base_year"] + 1].index(umodel)
                new_model_name = "%s_sampled_coef" % umodel
                config["models_in_year"][config["base_year"] + 1][i] = new_model_name
            except:
                pass
            config["models_configuration"][new_model_name] = Configuration(config["models_configuration"][umodel])
            config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][
                "sample_coefficients"
            ] = True
            config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][
                "distribution"
            ] = "'normal'"
            config["models_configuration"][new_model_name]["controller"]["prepare_for_run"]["arguments"][
                "cache_storage"
            ] = "base_cache_storage"
Esempio n. 38
0
from opus_core.storage_factory import StorageFactory
from opus_core.paths import get_opus_data_path_path
from opus_core.logger import block
import cPickle, pickle

pickle_filename = '/workspace/price_equilibration/dump_cached.pickle'
pickle_data = pickle.load(open(pickle_filename, 'rb'))

pickle_filename = '/workspace/price_equilibration/submarket_id.pickle'
submkt_data = pickle.load(open(pickle_filename, 'rb'))

data_path = get_opus_data_path_path()
cache_dir = os.path.join(data_path, 'bay_area_zone/base_year_data.original')
year = 2000
simulation_state = SimulationState()
simulation_state.set_current_time(year)
SimulationState().set_cache_directory(cache_dir)
attribute_cache = AttributeCache()
dataset_pool = SessionConfiguration(new_instance=True,
                         package_order=['bayarea', 'urbansim_parcel', 
                                        'urbansim', 'opus_core'],
                         in_storage=attribute_cache
                        ).get_dataset_pool()

hh = dataset_pool.get_dataset('household')
proportion = float(pickle_data['Xpagents'].size) / hh.size()

hh_cnty = hh.compute_variables('county=household.disaggregate(zone.county, intermediates=[building])')
hh_bldg_type = hh.compute_variables('building_type_id=household.disaggregate(building.building_type_id)')
hh_df = hh.to_dataframe()
import pdb; pdb.set_trace()
Esempio n. 39
0
class TestDataset(opus_unittest.OpusTestCase): 
    def setUp(self):
        self.start_year = 2001
        self.expected_sic_data = array([6,4,7,808,6])
        self.job_id = array([1,2,3,4,5])
        self.base_cache_dir = tempfile.mkdtemp(prefix='opus_tmp')
        self.simulation_state = SimulationState(low_memory_run=True, new_instance=True, base_cache_dir=self.base_cache_dir)
        self.dir = self.simulation_state.get_cache_directory()
        self.simulation_state.set_current_time(self.start_year)
        if not os.path.exists(self.dir):
            os.makedirs(self.dir)
        
        
    def tearDown(self):
        # the logger has a file open in the cache directory (by default, disable that file logging)
        if logger._file_stream:
            logger.disable_file_logging()
        
        for root, dirs, files in os.walk(self.dir, topdown=False):
            for filename in files:
                os.remove(os.path.join(root, filename))
            for directory in dirs:
                os.rmdir(os.path.join(root, directory))
        os.rmdir(self.dir)
        os.rmdir(self.base_cache_dir)
    
            
    def test_dict_dataset(self):
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='dataset',
            table_data={
                "id":array([1,2,3,4]), 
                "attr":array([4,7,2,1])
                }
            )
        
        ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id")
        
        self.assert_(ds.get_attribute("attr").sum()==14, "Something is wrong with the dataset.")
        self.assert_(ds.size()==4, "Wrong size of dataset.")
        
    def test_flt_dataset(self):
        import opus_core
        from opus_core.store.flt_storage import flt_storage
        
        attribute = 'little_endian'
        
        location = os.path.join(opus_core.__path__[0], 'data', 'flt')
        storage = flt_storage(storage_location=location)
        ds = Dataset(in_storage=storage, id_name=attribute, in_table_name='endians')
        
        self.assertAlmostEqual(11.0, ds.get_attribute_by_index(attribute, 0))
        self.assertEqual(None, ds.get_attribute_header(attribute))
 
    def test_join_by_rows(self):
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='dataset1', 
            table_data={    
                'id':array([2,4,6,8]), 
                'attr':array([4,7,2,1])
                }
            )
            
        storage.write_table(
            table_name='dataset2',
            table_data={
                'id':array([1,5,9]), 
                'attr':array([55,66,100])
                }
            )
        
        ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id')
        ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id')
        
        ds1.join_by_rows(ds2)
        self.assert_(ma.allclose(ds1.get_attribute('attr'), array([4,7,2,1,55,66,100])))
        self.assert_(ma.allclose(ds2.get_attribute('attr'), array([55,66,100])))
        
    def test_join_by_rows_for_unique_ids(self):
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='dataset1', 
            table_data={
                "id":array([2,4]), 
                "attr":array([4,7])
                }
            )
            
        storage.write_table(
            table_name='dataset2',
            table_data={
                "id":array([1,2]), 
                "attr":array([55,66])
                }
            )
        
        ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id')
        ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id')
        
        threw_exception = False
        try: 
            ds1.join_by_rows(ds2)
        except StandardError:
            threw_exception = True
        self.assert_(threw_exception)
        
    def test_join_by_rows_for_char_arrays(self):
        from numpy import alltrue
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='dataset1', 
            table_data={
                'id':array([2,4,6,8]), 
                'attr':array(['4','7','2','1'])
                }
            )
            
        storage.write_table(
            table_name='dataset2',
            table_data={
                'id':array([1,5,9]), 
                'attr':array(['55','66','100'])
                }
            )
        
        ds1 = Dataset(in_storage=storage, in_table_name='dataset1', id_name='id')
        ds2 = Dataset(in_storage=storage, in_table_name='dataset2', id_name='id')
        
        ds1.join_by_rows(ds2)
        self.assert_(alltrue(ds1.get_attribute('attr') == array(['4','7','2','1','55','66','100'])))
        self.assert_(alltrue(ds2.get_attribute('attr') == array(['55','66','100'])))
        
    def test_variable_dependencies_tree_with_versioning(self):
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='tests',
            table_data={
                'id':array([2,4]), 
                'a_dependent_variable':array([4,7]),
                'a_dependent_variable2':array([10,1])
                }
            )
        
        ds = Dataset(in_storage=storage, in_table_name='tests', id_name='id', dataset_name='tests')
        
        ds.compute_variables(["opus_core.tests.a_test_variable_with_two_dependencies"])
        
        self.assert_(ds.get_version("a_test_variable_with_two_dependencies")==0) #initially version=0
        self.assert_(ds.get_version("a_dependent_variable")==0)
        self.assert_(ds.get_version("a_dependent_variable2")==0)
        
        ds.modify_attribute("a_dependent_variable", array([0,0]))
        self.assert_(ds.get_version("a_dependent_variable")==1) # version=1
        
        ds.modify_attribute("a_dependent_variable", array([1,1]))
        self.assert_(ds.get_version("a_dependent_variable")==2) # version=2
        
        ds.compute_variables(["opus_core.tests.a_test_variable_with_two_dependencies"])
        self.assert_(ds.get_version("a_test_variable_with_two_dependencies")==1)
        
        ds.compute_variables(["opus_core.tests.a_test_variable_with_two_dependencies"])
        self.assert_(ds.get_version("a_test_variable_with_two_dependencies")==1) # version does not change
        
        autogen_variable = "my_var = 3 * opus_core.tests.a_dependent_variable"
        ds.compute_variables([autogen_variable])
        self.assert_(ds.get_version("my_var")==0)
        ds.compute_variables([autogen_variable])
        self.assert_(ds.get_version("my_var")==0)
        
    def test_compute_variable_with_unknown_package(self):
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='tests',
            table_data={
                'id':array([2,4]), 
                'attr1':array([4,7]),
                }
            )
        
        ds = Dataset(in_storage=storage, in_table_name='tests', id_name='id', dataset_name='test')
        
        ds.compute_one_variable_with_unknown_package("attr1_times_2", package_order=["opus_core"])
        
    def test_join_datasets_with_2_ids(self):
        from numpy import ma
        storage = StorageFactory().get_storage('dict_storage')
        
        storage.write_table(
            table_name='data1',
            table_data={
                'id1':array([2,4,2]),
                'id2':array([1,2,3]),
                'attr1':array([4,7,1]),
                'attr2':array([100,0,1000]),
                }
            )
        storage.write_table(
            table_name='data2',
            table_data={
                'id1':array([4,2,2]),
                'id2':array([2,3,1]),
                'attr1':array([50,60,70])
                }
            )
        
        ds1 = Dataset(in_storage=storage, in_table_name='data1', id_name=['id1', 'id2'], dataset_name='data1')
        ds2 = Dataset(in_storage=storage, in_table_name='data2', id_name=['id1', 'id2'], dataset_name='data2')
        ds1.join(ds2, 'attr1')
        self.assertEqual(ma.allequal(ds1.get_attribute('attr1'), array([70,50,60])), True)
        self.assertEqual(ma.allequal(ds1.get_attribute('attr2'), array([100,0,1000])), True)
Esempio n. 40
0
if __name__ == "__main__":    
    parser = OptionParser()
    parser.add_option("-a", "--augment-variables", dest="augment_variables", action="store_true",
                      help="model name")
    parser.add_option("-c", "--indicator-config", dest="indicator_config", action="store", 
                      type="string", help="model name")
    parser.add_option("-y", "--year", dest="year", action="store", type="int",
                      help="end year")
    (options, args) = parser.parse_args()

    runs = Runs.runs
    comparison_variables = Runs.comparison_variables
    baseline = Runs.baseline
    
    simulation_state = SimulationState()
    simulation_state.set_current_time(options.year)
    
    SessionConfiguration(new_instance=True,
                         package_order=['psrc','urbansim','opus_core'],
                         in_storage=AttributeCache())

    if options.augment_variables == True:
        for dataset_name in comparison_variables.keys():
            cache_directory = baseline
            simulation_state.set_cache_directory(cache_directory)
            dataset = DatasetFactory().get_dataset(dataset_name,
                                                    package='urbansim', 
                                                    arguments={'in_storage': AttributeCache()})
        
            variables = comparison_variables[dataset_name]
            dataset.compute_variables(variables, resources=Resources())
    def run(self, base_directory, urbansim_cache_directory, years, output_directory, temp_folder,
            coefficients_name, specification_name, convert_flt=True, convert_input=False):
        """ run the simulation
                base_directory: directory contains all years folder of lccm.
                urbansim_cache_directory: directory contains all years folder of urbansim cache.
                years: lists of year to run."""
        model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, 
                                     choice_attribute_name=self.lct_attribute, debuglevel=4)
        coefficients = Coefficients()
        storage = StorageFactory().get_storage('tab_storage', 
            storage_location=os.path.join(self.package_path, 'data'))
        coefficients.load(in_storage=storage, in_table_name=coefficients_name)
        specification = EquationSpecification(in_storage=storage)
        specification.load(in_table_name=specification_name)
        specification.set_variable_prefix("biocomplexity.land_cover.")
        constants = Constants()
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(urbansim_cache_directory)
        attribute_cache = AttributeCache()
        SessionConfiguration(new_instance=True,
                             package_order=['biocomplexity', 'urbansim', 'opus_core'],
                             in_storage=AttributeCache())
                
        ncols = LccmConfiguration.ncols        
        
        if temp_folder is None:
            self.temp_land_cover_dir = tempfile.mkdtemp()
        else:
            self.temp_land_cover_dir = temp_folder
        
        for year in years:
            land_cover_path = self._generate_input_land_cover(year, base_directory, urbansim_cache_directory, 
                                                              years, output_directory, convert_flt, convert_input)
            #max_size = 174338406 (orig) - act. int: 19019944 (37632028 incl NoData)
            max_size = self._get_max_index(land_cover_path) # 1st instance of lc_dataset - but looks like a 'lite' version
            offset = min(LccmConfiguration.offset, max_size)
            s = 0
            t = offset
            while (s < t and t <= max_size):
                logger.log_status("Offset: ", s, t)
                index = arange(s,t)
                
                land_cover_cache_path=os.path.join(urbansim_cache_directory,str(year),'land_covers')
                self._clean_up_land_cover_cache(land_cover_cache_path)
                
                simulation_state.set_current_time(year)
                
                # 2nd instance of lc_dataset
                land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                           out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                           debuglevel=4)
                land_covers.subset_by_index(index)
#                land_covers.load_dataset()
                gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4)

                agents_index = None
                model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells,
                              "constants":constants, "flush_variables":True},
                              chunk_specification = {'nchunks':5}) ## chunk size set here
                land_covers.flush_dataset()
                del gridcells
                del land_covers

#                self._generate_output_flt(year, urbansim_cache_directory, output_directory, convert_flt)
                self._generate_output_flt2(year, urbansim_cache_directory, output_directory, convert_flt)
                
                if t >= max_size: break
                s = max(t-10*ncols,s)
                t = min(t+offset-10*ncols,max_size)
                
        # clean up temp storage after done simulation
        shutil.rmtree(self.temp_land_cover_dir)
Esempio n. 42
0
class ModelSystem(object):
    """
    Uses the information in configuration to run/estimate a set of models for given set of years.
    """
    def __init__(self):
        self.running = False
        self.forked_processes = []
        self.running_conditional = threading.Condition()

    def run(self,
            resources,
            write_datasets_to_cache_at_end_of_year=True,
            log_file_name='run_model_system.log',
            cleanup_datasets=True):
        """Entries in resources: (entries with no defaults are required)
               models - a list containing names of models to be run. Each name
                           must correspond to the name of the module/class of that model. Default(object): None
               years - a tuple (start year, end year)
               debuglevel - an integer. The higher the more output will be printed. Default: 0
               expression_library - a dictionary.  The keys in the dictionary are pairs (dataset_name, variable_name)
               and the values are the corresponding expressions.  The model system needs to set the expression library
               (if it isn't None) in DatasetFactory for DatasetFactory to know about variables defined as expressions
               in the xml expression library.  Default: None
        This method is called both to start up the simulation for all years, and also for each year
        when running with one process per year.  In the latter case, 'years' consists of just
        (current_year, current_year) rather than the real start and end years for the simulation.
        """
        if not isinstance(resources, Resources):
            raise TypeError, "Argument 'resources' must be of type 'Resources'."
        logger_settings = resources.get("log", {
            "tags": [],
            "verbosity_level": 3
        })
        logger.set_tags(logger_settings.get("tags", []))
        logger.set_verbosity_level(logger_settings.get("verbosity_level", 3))
        self.simulation_state = SimulationState()
        self.simulation_state.set_low_memory_run(
            resources.get("low_memory_mode", False))
        self.simulation_state.set_start_time(resources.get("base_year", 0))
        self.run_year_namespace = {}

        if resources.get('cache_directory', None) is not None:
            self.simulation_state.set_cache_directory(
                resources['cache_directory'])

        if 'expression_library' in resources:
            VariableFactory().set_expression_library(
                resources['expression_library'])

        if resources.get('sample_input', False):
            self.update_config_for_multiple_runs(resources)

        cache_directory = self.simulation_state.get_cache_directory()
        log_file = os.path.join(cache_directory, log_file_name)
        logger.enable_file_logging(log_file, verbose=False)
        try:
            logger.log_status("Cache Directory set to: " + cache_directory)

            with logger.block('Start simulation run'):
                models = resources.get("models", [])
                models_in_years = resources.get("models_in_year", {})

                resources.check_obligatory_keys(["years"])

                years = resources["years"]
                if (not isinstance(years, tuple)) and (not isinstance(
                        years, list)):
                    raise TypeError, "Entry 'years' in resources must be a tuple."

                if len(years) < 2:
                    print years
                    raise StandardError, "Entry 'years' in resources must be of length at least 2."

                start_year = years[0]
                end_year = years[-1]

                debuglevel = resources.get("debuglevel", 0)
                seed_values = resources.get('seed', NO_SEED)

                logger.log_status("random seed = %s" % str(seed_values))
                seed(seed_values)

                for year in range(start_year, end_year + 1):
                    with logger.block("Starting simulation for year " +
                                      str(year)):
                        self.simulation_state.set_current_time(year)
                        SessionConfiguration().get_dataset_pool(
                        ).remove_all_datasets()
                        logger.disable_file_logging(log_file)
                        try:
                            if models_in_years.get(year, None) is not None:
                                models_to_run = models_in_years[year]
                            else:
                                models_to_run = models
                            self._run_year(
                                year=year,
                                models=models_to_run,
                                simulation_state=self.simulation_state,
                                debuglevel=debuglevel,
                                resources=resources,
                                write_datasets_to_cache_at_end_of_year=
                                write_datasets_to_cache_at_end_of_year,
                                cleanup_datasets=cleanup_datasets)
                        finally:
                            logger.enable_file_logging(log_file, verbose=False)
                        collect()

        finally:
            logger.disable_file_logging(log_file)

    def flush_datasets(self, dataset_names, after_model=False):
        dataset_pool = SessionConfiguration().get_dataset_pool()
        for dataset_name in dataset_names:
            if dataset_pool.has_dataset(dataset_name):
                self.flush_dataset(dataset_pool.get_dataset(dataset_name),
                                   after_model=after_model)

    def flush_dataset(self, dataset, after_model=False):
        """Write the PRIMARY attributes of this dataset to the cache."""
        if dataset and isinstance(dataset, Dataset):
            # Do not flush after model if not necessary
            if after_model:
                if len(dataset.get_attribute_names()) <= len(
                        dataset.get_id_name()):
                    return
                if (len(dataset.get_attribute_names()) == len(dataset.get_known_attribute_names())) and \
                                         (len(dataset.get_attributes_in_memory()) <= len(dataset.get_id_name())):
                    dataset.delete_computed_attributes()
                    return
            dataset.delete_computed_attributes()
            dataset.load_and_flush_dataset()

    def flush_datasets_after_model(self, resources):
        if resources.get('flush_variables', False):
            AttributeCache().delete_computed_tables()
            # this will also delete computed attributes
            datasets_to_cache = SessionConfiguration().get_dataset_pool(
            ).datasets_in_pool().keys()
        else:
            datasets_to_cache = resources.get(
                "datasets_to_cache_after_each_model", [])
        self.flush_datasets(datasets_to_cache, after_model=True)

    def _run_year(self,
                  year,
                  models,
                  simulation_state,
                  debuglevel,
                  resources,
                  write_datasets_to_cache_at_end_of_year,
                  cleanup_datasets=True):
        """
        Assumes that all datasets resides in the cache directory in binary format.
        """
        try:
            import wingdbstub
        except:
            pass
        self.vardict = {}
        log_file_name = os.path.join(simulation_state.get_cache_directory(),
                                     "year_%s_log.txt" % year)
        logger.enable_file_logging(log_file_name, 'w')
        try:
            logger.start_block('Simulate year %s' % year)
            try:
                base_year = resources['base_year']
                if year == base_year:
                    year_for_base_year_cache = year  # case of estimation
                else:
                    year_for_base_year_cache = year - 1
                cache_storage = AttributeCache().get_flt_storage_for_year(
                    year_for_base_year_cache)
                self.vardict['cache_storage'] = cache_storage
                base_cache_storage = AttributeCache().get_flt_storage_for_year(
                    base_year)
                self.vardict['base_cache_storage'] = base_cache_storage
                simulation_state.set_flush_datasets(
                    resources.get("flush_variables", False))
                SessionConfiguration()["simulation_year"] = year
                SessionConfiguration()["debuglevel"] = debuglevel
                datasets_to_preload_in_year = resources.get(
                    'datasets_to_preload_in_year', {})
                if datasets_to_preload_in_year.get(year, None) is not None:
                    datasets_to_preload = datasets_to_preload_in_year[year]
                else:
                    datasets_to_preload = resources.get(
                        'datasets_to_preload', {})
                for dataset_name in datasets_to_preload:
                    SessionConfiguration().get_dataset_from_pool(dataset_name)
                models_configuration = resources.get('models_configuration',
                                                     {})
                dataset_pool = SessionConfiguration().get_dataset_pool()
                datasets = {}
                for dataset_name, its_dataset in dataset_pool.datasets_in_pool(
                ).iteritems():
                    self.vardict[dataset_name] = its_dataset
                    datasets[dataset_name] = its_dataset
                    exec '%s=its_dataset' % dataset_name

                # This is needed. It resides in locals()
                # and is passed on to models as they run.
                ### TODO: There has got to be a better way!
                model_resources = Resources(datasets)
                n_models, model_group_members_to_run = self.get_number_of_models_and_model_group_members_to_run(
                    models, models_configuration)
                self.run_year_namespace = locals()
                #==========
                # Run the models.
                #==========
                model_number = -1
                for model_entry in models:
                    # list 'models' can be in the form:
                    # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
                    #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
                    #                                      'commercial']}},
                    #  {'model_name_3': ['estimate', 'run']},
                    #  'model_name_4',
                    #  {'model_name_5': {'group_members': 'all'}}
                    # ]
                    # get list of methods to be processed evtl. for each group member
                    if isinstance(model_entry, dict):
                        model_name, value = model_entry.items()[0]
                        if not isinstance(value, dict):  # is a model group
                            processes = value
                            if not isinstance(processes, list):
                                processes = [processes]
                    else:  # in the form 'model_name_4' in the comment above
                        model_name = model_entry
                        processes = ["run"]
                    group_member = None
                    model_group = model_group_members_to_run[model_name][1]
                    last_member = max(
                        1,
                        len(model_group_members_to_run[model_name][0].keys()))
                    for imember in range(last_member):
                        controller_config = models_configuration[model_name][
                            "controller"]
                        model_configuration = models_configuration[model_name]
                        if model_group_members_to_run[model_name][0].keys():
                            group_member_name = model_group_members_to_run[
                                model_name][0].keys()[imember]
                            group_member = ModelGroupMember(
                                model_group, group_member_name)
                            processes = model_group_members_to_run[model_name][
                                0][group_member_name]
                            member_model_name = "%s_%s" % (group_member_name,
                                                           model_name)
                            if member_model_name in models_configuration.keys(
                            ):
                                model_configuration = models_configuration[
                                    member_model_name]
                                if "controller" in model_configuration.keys():
                                    controller_config = model_configuration[
                                        "controller"]
                        datasets_to_preload_for_this_model = controller_config.get(
                            '_model_structure_dependencies_',
                            {}).get('dataset', [])
                        for dataset_name in datasets_to_preload_for_this_model:
                            try:
                                if not dataset_pool.has_dataset(
                                        dataset_name) or (
                                            dataset_name
                                            not in datasets.keys()):
                                    ds = dataset_pool.get_dataset(dataset_name)
                                    self.vardict[dataset_name] = ds
                                    datasets[dataset_name] = ds
                                    exec '%s=ds' % dataset_name
                            except:
                                logger.log_warning(
                                    'Failed to load dataset %s.' %
                                    dataset_name)
                        # import part
                        if "import" in controller_config.keys():
                            import_config = controller_config["import"]
                            for import_module in import_config.keys():
                                exec("from %s import %s" %
                                     (import_module,
                                      import_config[import_module]))

                        # gui_import_replacements part
                        # This is a temporary hack -- replicates the functionality of the "import" section
                        # for use with the GUI.  The contents of this part of the config is a dictionary.
                        # Keys are names of models (not used here).  Values are 2 element pairs.
                        # The first element is a name and the second is a value.  Bind the name to the value.
                        if "gui_import_replacements" in controller_config.keys(
                        ):
                            import_replacement_config = controller_config[
                                "gui_import_replacements"]
                            for model_name in import_replacement_config.keys():
                                pair = import_replacement_config[model_name]
                                temp = pair[1]
                                exec("%s = temp") % pair[0]

                        # init part
                        model = self.do_init(locals())

                        # estimate and/or run part
                        for process in processes:
                            model_number = model_number + 1
                            # write status file
                            model.set_model_system_status_parameters(
                                year, n_models, model_number,
                                resources.get('status_file_for_gui', None))
                            model.write_status_for_gui()
                            # prepare part
                            exec(self.do_prepare(locals()))
                            processmodel_config = controller_config[process]
                            if "output" in processmodel_config.keys():
                                outputvar = processmodel_config["output"]
                            else:
                                outputvar = "process_output"
                            self.vardict[outputvar] = self.do_process(locals())
                            exec outputvar + '=self.vardict[outputvar]'

                            # check command file from gui, if the simulation should be stopped or paused
                            self.do_commands_from_gui(
                                resources.get('command_file_for_gui', None))

                            # capture namespace for interactive estimation
                            self.run_year_namespace = locals()
                            self.flush_datasets_after_model(resources)
                            del model
                            collect()

                # Write all datasets to cache.
                if write_datasets_to_cache_at_end_of_year:
                    logger.start_block(
                        'Writing datasets to cache for year %s' % year)
                    try:
                        for dataset_name, its_dataset in SessionConfiguration(
                        ).get_dataset_pool().datasets_in_pool().iteritems():
                            self.flush_dataset(its_dataset)
                    finally:
                        logger.end_block()

            finally:
                logger.end_block()
        finally:
            logger.disable_file_logging(log_file_name)

        if cleanup_datasets:
            SessionConfiguration().delete_datasets()

    def do_init(self, parent_state):
        """Run the 'init' part of this model's configuration.
        Returns model object.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> 'self':
                exec('%s = parent_state["%s"]' % (key, key))
        init_config = parent_state['controller_config']["init"]
        group_member = parent_state['group_member']
        if group_member is None:  # No model group
            cmd = "%s(%s)" % (
                init_config["name"],
                self.construct_arguments_from_config(init_config))
            model = eval(cmd)
        else:  # Model belongs to a group
            model = eval("%s(group_member, %s)" %
                         (init_config["name"],
                          self.construct_arguments_from_config(init_config)))
        return model

    def do_prepare(self, parent_state):
        """Prepares for the current model in the parent state's context.
        What to do is determined by the contents of the current model's controller configuration.

        controller_config is the 'controller' part of the model configuration.
        vardict is a dictionary into which the output of the model's 'prepare_output'
        method will be put.
        """
        # give this method the same local variables as its calling method has.
        for key in parent_state.keys():
            if key <> 'self':
                exec('%s = parent_state["%s"]' % (key, key))
        key_name = "prepare_for_%s" % process
        if key_name in controller_config.keys():
            prepare_config = controller_config[key_name]
            if "output" in prepare_config.keys():
                outputvar = prepare_config["output"]
            else:
                outputvar = "prepare_output"
            self.vardict[outputvar] = eval(
                "model.%s(%s)" %
                (prepare_config["name"],
                 self.construct_arguments_from_config(prepare_config)))
            return '%s=self.vardict["%s"]' % (outputvar, outputvar)
        else:
            # do nothing when return value is exec'ed
            return ''

    def do_process(self, parent_state):
        for key in parent_state.keys():
            if key <> 'self':
                exec('%s = parent_state["%s"]' % (key, key))
        ev = "model.%s(%s)" % (
            process, self.construct_arguments_from_config(processmodel_config))
        return eval(ev)

    def get_number_of_models_and_model_group_members_to_run(
            self, models, models_configuration):
        """Count number_of models in the list 'models' that can include group members (each member and each process is one model)."""
        # list models can be in the form:
        # [{'model_name_1': {'group_members': ['residential', 'commercial']}},
        #  {'model_name_2': {'group_members': [{'residential': ['estimate','run']},
        #                                      'commercial']}},
        #  {'model_name_3': ['estimate', 'run']},
        #  'model_name_4',
        #  {'model_name_5': {'group_members': 'all'}}
        # ]
        number_of_models = 1
        model_group_members_to_run = {}
        for model_entry in models:
            if isinstance(model_entry, dict):
                model_name, value = model_entry.items()[0]
                if isinstance(value, dict):  # is a model group
                    if not value.keys()[0] == "group_members":
                        raise KeyError, "Key for model " + model_name + " must be 'group_members'."
                    group_members = value["group_members"]
                    model_group = None
                    if 'group_by_attribute' in models_configuration[
                            model_name]["controller"].keys():
                        group_dataset_name, group_attribute = models_configuration[
                            model_name]["controller"]['group_by_attribute']
                        model_group = ModelGroup(
                            SessionConfiguration().get_dataset_from_pool(
                                group_dataset_name), group_attribute)
                    if not isinstance(group_members, list):
                        group_members = [group_members]
                    if group_members[
                            0] == "_all_":  # see 'model_name_5' example above
                        if model_group is None:
                            raise KeyError, "Entry 'group_by_attribute' is missing for model %s" % model_name
                        group_members = model_group.get_member_names()
                    model_group_members_to_run[model_name] = [{}, model_group]
                    for member in group_members:
                        if isinstance(member, dict):
                            # see 'model_name_2' ('residential') in the comment above
                            member_name = member.keys()[0]
                            model_group_members_to_run[model_name][0][
                                member_name] = member[member_name]
                            if not isinstance(
                                    model_group_members_to_run[model_name][0]
                                [member_name], list):
                                model_group_members_to_run[model_name][0][
                                    member_name] = [
                                        model_group_members_to_run[model_name]
                                        [0][member_name]
                                    ]
                            number_of_models += len(
                                model_group_members_to_run[model_name][0]
                                [member_name])
                        else:  # see 'model_name_1'
                            model_group_members_to_run[model_name][0][
                                member] = ["run"]
                            number_of_models += len(
                                model_group_members_to_run[model_name][0]
                                [member])
                else:  # in the form 'model_name_3' in the comment above
                    model_group_members_to_run[model_name] = [{}, None]
                    if not isinstance(value, list):
                        number_of_models += 1
                    else:
                        number_of_models += len(value)
            else:  # in the form 'model_name_4' in the comment above
                model_group_members_to_run[model_entry] = [{}, None]
                number_of_models += 1
        return (number_of_models, model_group_members_to_run)

    def do_commands_from_gui(self, filename=None):
        if (filename is None) or not os.path.exists(filename):
            return
        while True:
            f = file(filename)
            line = f.read().strip()
            f.close()
            if line == 'stop':
                logger.log_warning('Simulation stopped.')
                sys.exit()
            elif line == 'resume':
                break
            elif line <> 'pause':
                logger.log_warning(
                    "Unknown command '%s'. Allowed commands: 'stop', 'pause', 'resume'."
                    % line)
            time.sleep(10)

    def run_multiprocess(self, resources):
        resources = Resources(resources)
        profiler_name = resources.get("profile_filename", None)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is absolutely no good reason to be
        ###       changing the Configuration!
        resources['cache_directory'] = cache_directory

        log_file = os.path.join(cache_directory, 'run_multiprocess.log')
        logger.enable_file_logging(log_file)

        start_year = resources["years"][0]
        end_year = resources["years"][-1]
        nyears = end_year - start_year + 1
        root_seed = resources.get("seed", NO_SEED)
        if resources.get('_seed_dictionary_', None) is not None:
            # This is added by the RunManager to ensure reproducibility including restarted runs
            seed_dict = resources.get('_seed_dictionary_')
            seed_array = array(
                map(lambda year: seed_dict[year],
                    range(start_year, end_year + 1)))
        else:
            seed(root_seed)
            seed_array = randint(1, 2**30, nyears)
        logger.log_status("Running simulation for years %d thru %d" %
                          (start_year, end_year))
        logger.log_status("Simulation root seed: %s" % root_seed)

        for iyear, year in enumerate(range(start_year, end_year + 1)):
            success = self._run_each_year_as_separate_process(
                iyear,
                year,
                seed=seed_array[iyear],
                resources=resources,
                profiler_name=profiler_name,
                log_file=log_file)
            if not success:
                break

        self._notify_stopped()
        if profiler_name is not None:  # insert original value
            resources["profile_filename"] = profiler_name
        logger.log_status("Done running simulation for years %d thru %d" %
                          (start_year, end_year))

    #TODO: changing of configuration
    def _run_each_year_as_separate_process(self,
                                           iyear,
                                           year,
                                           seed=None,
                                           resources=None,
                                           profiler_name=None,
                                           log_file=None):

        logger.start_block('Running simulation for year %d in new process' %
                           year)
        resources['years'] = (year, year)
        resources['seed'] = seed,

        if profiler_name is not None:
            # add year to the profile name
            resources["profile_filename"] = "%s_%s" % (profiler_name, year)

        optional_args = []
        if log_file:
            optional_args += ['--log-file-name', os.path.split(log_file)[-1]]

        success = False
        try:
            logger.disable_file_logging(log_file)
            success = self._fork_new_process(
                'opus_core.model_coordinators.model_system',
                resources,
                optional_args=optional_args)
            logger.enable_file_logging(log_file, verbose=False)
        finally:
            logger.end_block()

        return success

    def run_in_one_process(
            self,
            resources,
            run_in_background=False,
            class_path='opus_core.model_coordinators.model_system'):
        resources = Resources(resources)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources['cache_directory'] = cache_directory

        self._fork_new_process('%s' % class_path,
                               resources,
                               delete_temp_dir=False,
                               run_in_background=run_in_background)
        self._notify_stopped()

    def run_in_same_process(self, resources, **kwargs):
        resources = Resources(resources)
        if resources['cache_directory'] is not None:
            cache_directory = resources['cache_directory']
        else:
            cache_directory = SimulationState().get_cache_directory()

        ### TODO: Get rid of this! There is no good reason to be changing the
        ###       Configuration.
        resources['cache_directory'] = cache_directory

        self._notify_started()
        RunModelSystem(model_system=self, resources=resources, **kwargs)
        self._notify_stopped()

    def construct_arguments_from_config(self, config):
        key = "arguments"
        if (key not in config.keys()) or (len(config[key].keys()) <= 0):
            return ""
        arg_dict = config[key]
        result = ""
        for arg_key in arg_dict.keys():
            result += "%s=%s, " % (arg_key, arg_dict[arg_key])
        return result

    def wait_for_start(self):
        self.running_conditional.acquire()
        while not self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_finish(self):
        self.running_conditional.acquire()
        while self.running:
            self.running_conditional.wait()
        self.running_conditional.release()

    def wait_for_process_or_finish(self, process_index):
        self.running_conditional.acquire()
        while process_index >= len(self.forked_processes) and self.running:
            self.running_conditional.wait()
        self.running_conditional.release()
        if not self.running:
            process_index = len(self.forked_processes) - 1
        return process_index

    def _fork_new_process(self,
                          module_name,
                          resources,
                          run_in_background=False,
                          **key_args):
        self.running_conditional.acquire()
        self.running = True
        self.forked_processes.append(ForkProcess())
        key_args["run_in_background"] = run_in_background
        success = self.forked_processes[-1].fork_new_process(
            module_name, resources, **key_args)
        self.running_conditional.notifyAll()
        self.running_conditional.release()
        if not run_in_background:
            self.forked_processes[-1].wait()
            self.forked_processes[-1].cleanup()
        return success

    def _notify_started(self):
        self.running_conditional.acquire()
        self.running = True
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def _notify_stopped(self):
        self.running_conditional.acquire()
        self.running = False
        self.running_conditional.notifyAll()
        self.running_conditional.release()

    def update_config_for_multiple_runs(self, config):
        models_to_update = config.get('models_with_sampled_coefficients', [])
        if 'models_in_year' not in config.keys():
            config['models_in_year'] = {}
        if config['models_in_year'].get(config['base_year'] + 1, None) is None:
            config['models_in_year'][config['base_year'] +
                                     1] = config.get('models')

        for umodel in models_to_update:
            try:
                i = config['models_in_year'][config['base_year'] +
                                             1].index(umodel)
                new_model_name = '%s_sampled_coef' % umodel
                config['models_in_year'][config['base_year'] +
                                         1][i] = new_model_name
            except:
                pass
            config["models_configuration"][new_model_name] = Configuration(
                config["models_configuration"][umodel])
            config["models_configuration"][new_model_name]["controller"][
                "prepare_for_run"]["arguments"]["sample_coefficients"] = True
            config["models_configuration"][new_model_name]["controller"][
                "prepare_for_run"]["arguments"]["distribution"] = "'normal'"
            config["models_configuration"][new_model_name]["controller"][
                "prepare_for_run"]["arguments"][
                    "cache_storage"] = "base_cache_storage"
Esempio n. 43
0
if __name__ == "__main__":    
    parser = OptionParser()
    parser.add_option("-a", "--augment-variables", dest="augment_variables", action="store_true",
                      help="model name")
    parser.add_option("-c", "--indicator-config", dest="indicator_config", action="store", 
                      type="string", help="model name")
    parser.add_option("-y", "--year", dest="year", action="store", type="int",
                      help="end year")
    (options, args) = parser.parse_args()

    runs = Runs.runs
    comparison_variables = Runs.comparison_variables
    baseline = Runs.baseline
    
    simulation_state = SimulationState()
    simulation_state.set_current_time(options.year)
    
    SessionConfiguration(new_instance=True,
                         package_order=['psrc','urbansim','opus_core'],
                         in_storage=AttributeCache())

    if options.augment_variables == True:
        for dataset_name in comparison_variables.keys():
            cache_directory = baseline
            simulation_state.set_cache_directory(cache_directory)
            dataset = DatasetFactory().get_dataset(dataset_name,
                                                    package='urbansim', 
                                                    arguments={'in_storage': AttributeCache()})
        
            variables = comparison_variables[dataset_name]
            dataset.compute_variables(variables, resources=Resources())
Esempio n. 44
0
        else:
            cache_directory = _cache_directory

    try:
        year = int(options.year)
    except IndexError:
        parser.error("year must be provided.")
        parser.print_help()
        sys.exit(1)

    if package_order is None:
        package_order = eval(options.package_order)


    st = SimulationState()
    st.set_current_time(year)
    st.set_cache_directory(cache_directory)
    attribute_cache = AttributeCache()
    dp = SessionConfiguration(new_instance=True,
                              package_order=package_order,
                              in_storage=attribute_cache
                              ).get_dataset_pool()

    
    ## example usage:
    # python -i explore_run_cache.py -p bay_area_parcel -r 105 2025
    # >>> h2025 = dp.get_dataset('household')
    # >>> children_5yr = h2025.compute_variables('household.aggregate(person.age <= 5)')

    # python -d /workspace/opus/data/bay_area_parcel/base_year_data 2010
    # >>> h2010 = dp.get_dataset('household')
Esempio n. 45
0
    def run(self, resources, year):
        cache_directory = config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        SessionConfiguration(new_instance=True,
                             in_storage=AttributeCache())
                             
        arguments = {'in_storage':attribute_cache}
        gc_set = DatasetFactory().get_dataset('gridcell', package='urbansim', 
                                              arguments=arguments)

        runs = {
        #r'X:\urbansim_cache\run_1713.2007_01_03_11_16':r'(run 1713 - baseline)',
        #r'X:\urbansim_cache\run_1714.2007_01_03_11_20':r'(run 1714 - no ugb)',
        #r'X:\urbansim_cache\run_1731.2007_01_03_11_16':r'(run 1731 - no build)',
        
                r'X:\urbansim_cache\run_1847.2007_01_15_15_23':r'(run 1847 - no UGB 1/17/2007)',
                r'X:\urbansim_cache\run_1848.2007_01_15_15_40':r'(run 1848 - no UGB+1.5xhighway 1/17/2007)',
        #        r'X:\urbansim_cache\run_1849.2007_01_15_16_09':r'(run 1849 - baseline 1/17/2007)',
                r'V:\psrc\run_1850.2007_01_15_17_03':r'(run 1850 - baseline 1/17/2007)',
                r'V:\psrc\run_1851.2007_01_15_17_07':r'(run 1851 - no build 1/17/2007)'
                }
        
        #baseline = r'X:\urbansim_cache\run_1713.2007_01_03_11_16'
        baseline = r'V:\psrc\run_1850.2007_01_15_17_03'
        
        comparison_variables = {'gridcell': ['urbansim.gridcell.population',
                                             'urbansim.gridcell.number_of_jobs'],
                                'faz':['urbansim.faz.population',
                                       'urbansim.faz.number_of_jobs'], 
                                 }
        #datasets_to_preload = {
        #                'gridcell':{ 'nchunks':2},
        #                'household':{},
        #                'job':{},
        #                'zone':{},
        #                'faz':{},
        #                'development_type':{},
        #                'development_event_history':{},
        #                'development_constraint':{},
        #                'job_building_type':{},
        #                'urbansim_constant':{},
        #                }
        
        year = 2025
        
        simulation_state = SimulationState()
        simulation_state.set_current_time(year)
        
        SessionConfiguration(new_instance=True,
                             package_order=['psrc','urbansim','opus_core'],
                             in_storage=AttributeCache())
        
        #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache)
        #datasets = DatasetFactory().create_datasets_from_flt(datasets_to_preload,
        #                                                    "urbansim",
        #                                                    additional_arguments={'in_storage': AttributeCache()})
        
        variable_augment = False
        if variable_augment == True:
            for dataset_name in comparison_variables.keys():
                cache_directory = baseline
                simulation_state.set_cache_directory(cache_directory)
                dataset = DatasetFactory().get_dataset(dataset_name,
                                                        package='urbansim', 
                                                        arguments={'in_storage': AttributeCache()})
            
                variables = comparison_variables[dataset_name]
                dataset.compute_variables(variables, resources=Resources())
                ids = dataset.get_id_attribute()
                for run in runs.keys():
                    cache_directory=run
                    simulation_state.set_cache_directory(cache_directory)
                    run_dataset = DatasetFactory().get_dataset(dataset_name, 
                                                               package='urbansim', 
                                                               arguments={'in_storage': AttributeCache()})
                    match_index = run_dataset.get_id_index(ids)
                    for variable in variables:
                        short_name = VariableName(variable).alias()
                        attribute = dataset.get_attribute(short_name)
                        run_dataset.add_attribute(attribute[match_index],'baseline_'+short_name,metadata=1)
                        run_dataset.flush_attribute('baseline_'+short_name)
                    
                    SessionConfiguration().get_dataset_pool().remove_all_datasets()
        
        #indicators_module = args[0]
        #eval("from %s import config" % indicators_module)
        from make_indicators_openev import config
        from urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators
        #from make_indicators_openev import config
        #from inprocess.travis.urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators
        for run, descriptin in runs.iteritems():
            config.request_years = [year]
            config.cache_directory = run
            config.run_description = descriptin
            generate_indicators(config)        
    def run(self, resources, year):
        cache_directory = config['cache_directory']
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(cache_directory)
        simulation_state.set_current_time(year)
        attribute_cache = AttributeCache()
        SessionConfiguration(new_instance=True, in_storage=AttributeCache())

        arguments = {'in_storage': attribute_cache}
        gc_set = DatasetFactory().get_dataset('gridcell',
                                              package='urbansim',
                                              arguments=arguments)

        runs = {
            #r'X:\urbansim_cache\run_1713.2007_01_03_11_16':r'(run 1713 - baseline)',
            #r'X:\urbansim_cache\run_1714.2007_01_03_11_20':r'(run 1714 - no ugb)',
            #r'X:\urbansim_cache\run_1731.2007_01_03_11_16':r'(run 1731 - no build)',
            r'X:\urbansim_cache\run_1847.2007_01_15_15_23':
            r'(run 1847 - no UGB 1/17/2007)',
            r'X:\urbansim_cache\run_1848.2007_01_15_15_40':
            r'(run 1848 - no UGB+1.5xhighway 1/17/2007)',
            #        r'X:\urbansim_cache\run_1849.2007_01_15_16_09':r'(run 1849 - baseline 1/17/2007)',
            r'V:\psrc\run_1850.2007_01_15_17_03':
            r'(run 1850 - baseline 1/17/2007)',
            r'V:\psrc\run_1851.2007_01_15_17_07':
            r'(run 1851 - no build 1/17/2007)'
        }

        #baseline = r'X:\urbansim_cache\run_1713.2007_01_03_11_16'
        baseline = r'V:\psrc\run_1850.2007_01_15_17_03'

        comparison_variables = {
            'gridcell': [
                'urbansim.gridcell.population',
                'urbansim.gridcell.number_of_jobs'
            ],
            'faz': ['urbansim.faz.population', 'urbansim.faz.number_of_jobs'],
        }
        #datasets_to_preload = {
        #                'gridcell':{ 'nchunks':2},
        #                'household':{},
        #                'job':{},
        #                'zone':{},
        #                'faz':{},
        #                'development_type':{},
        #                'development_event_history':{},
        #                'development_constraint':{},
        #                'job_building_type':{},
        #                'urbansim_constant':{},
        #                }

        year = 2025

        simulation_state = SimulationState()
        simulation_state.set_current_time(year)

        SessionConfiguration(new_instance=True,
                             package_order=['psrc', 'urbansim', 'opus_core'],
                             in_storage=AttributeCache())

        #cache_storage = AttributeCache().get_flt_storage_for_year(year_for_base_year_cache)
        #datasets = DatasetFactory().create_datasets_from_flt(datasets_to_preload,
        #                                                    "urbansim",
        #                                                    additional_arguments={'in_storage': AttributeCache()})

        variable_augment = False
        if variable_augment == True:
            for dataset_name in comparison_variables.keys():
                cache_directory = baseline
                simulation_state.set_cache_directory(cache_directory)
                dataset = DatasetFactory().get_dataset(
                    dataset_name,
                    package='urbansim',
                    arguments={'in_storage': AttributeCache()})

                variables = comparison_variables[dataset_name]
                dataset.compute_variables(variables, resources=Resources())
                ids = dataset.get_id_attribute()
                for run in runs.keys():
                    cache_directory = run
                    simulation_state.set_cache_directory(cache_directory)
                    run_dataset = DatasetFactory().get_dataset(
                        dataset_name,
                        package='urbansim',
                        arguments={'in_storage': AttributeCache()})
                    match_index = run_dataset.get_id_index(ids)
                    for variable in variables:
                        short_name = VariableName(variable).alias()
                        attribute = dataset.get_attribute(short_name)
                        run_dataset.add_attribute(attribute[match_index],
                                                  'baseline_' + short_name,
                                                  metadata=1)
                        run_dataset.flush_attribute('baseline_' + short_name)

                    SessionConfiguration().get_dataset_pool(
                    ).remove_all_datasets()

        #indicators_module = args[0]
        #eval("from %s import config" % indicators_module)
        from make_indicators_openev import config
        from urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators
        #from make_indicators_openev import config
        #from inprocess.travis.urbansim.indicators.indicator_configuration_handler_batch_mode import generate_indicators
        for run, descriptin in runs.iteritems():
            config.request_years = [year]
            config.cache_directory = run
            config.run_description = descriptin
            generate_indicators(config)