def _convert_lccm_input(self, flt_directory_in, flt_directory_out): gc.collect() t1 = time() lc = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=flt_directory_in), out_storage=StorageFactory().get_storage( 'flt_storage', storage_location=flt_directory_out)) # lc.get_header() # added 23 june 2009 by mm mask = lc.get_mask() idx = where(mask == 0)[0] lcsubset = DatasetSubset(lc, idx) print "Converting:" lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers") lc.delete_one_attribute("relative_x") lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers") lc.delete_one_attribute("relative_y") lc.flush_dataset() gc.collect() # lc_names = lc.get_primary_attribute_names() for attr in lc.get_primary_attribute_names(): print " ", attr lcsubset.write_dataset(attributes=[attr], out_table_name="land_covers") lc.delete_one_attribute(attr) logger.log_status("Data conversion done. " + str(time() - t1) + " s")
def _copy_invariants_to_temp_land_cover_dir(self, land_cover_path): logger.log_status("temp input land cover data in " + self.temp_land_cover_dir) land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage("flt_storage", storage_location=land_cover_path), out_storage=StorageFactory().get_storage("flt_storage", storage_location=self.temp_land_cover_dir), out_table_name='land_covers', debuglevel=4) logger.log_status("Land cover dataset created.... ") # added dec 4, 2009 land_covers.flush_dataset() # added dec 4, 2009 land_covers.write_dataset(attributes=AttributeType.PRIMARY)
def run(self, base_directory, urbansim_cache_directory, years): """ run the simulation base_directory: directory contains all years folder of lccm. urbansim_cache_directory: directory contains all years folder of urbansim cache. years: lists of year to run.""" model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, choice_attribute_name=self.lct_attribute, debuglevel=4) coefficients = Coefficients() storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join( self.package_path, 'data')) coefficients.load(in_storage=storage, in_table_name="land_cover_change_model_coefficients") specification = EquationSpecification(in_storage=storage) specification.load( in_table_name="land_cover_change_model_specification") specification.set_variable_prefix("biocomplexity.land_cover.") constants = Constants() simulation_state = SimulationState() simulation_state.set_cache_directory(urbansim_cache_directory) attribute_cache = AttributeCache() index = arange(100000) for year in years: simulation_state.set_current_time(year) #land_cover_path = os.path.join(base_directory, str(year)) land_cover_path = base_directory land_covers = LandCoverDataset( in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=land_cover_path), out_storage=StorageFactory().get_storage( 'flt_storage', storage_location=land_cover_path), debuglevel=4) land_covers.subset_by_index(index) #land_covers.load_dataset() gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4) agents_index = None model.run(specification, coefficients, land_covers, data_objects={ "gridcell": gridcells, "constants": constants, "flush_variables": True }, chunk_specification={'nchunks': 1}) land_covers.flush_dataset() del gridcells del land_covers
def _convert_lccm_input(self, flt_directory_in, flt_directory_out): gc.collect() t1 = time() lc = LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in), out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_out)) # lc.get_header() # added 23 june 2009 by mm mask = lc.get_mask() idx = where(mask==0)[0] lcsubset = DatasetSubset(lc, idx) print "Converting:" lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers") lc.delete_one_attribute("relative_x") lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers") lc.delete_one_attribute("relative_y") lc.flush_dataset() gc.collect() # lc_names = lc.get_primary_attribute_names() for attr in lc.get_primary_attribute_names(): print " ", attr lcsubset.write_dataset(attributes=[attr], out_table_name="land_covers") lc.delete_one_attribute(attr) logger.log_status("Data conversion done. " + str(time()-t1) + " s")
def run(self, base_directory, urbansim_cache_directory, years): """ run the simulation base_directory: directory contains all years folder of lccm. urbansim_cache_directory: directory contains all years folder of urbansim cache. years: lists of year to run.""" model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, choice_attribute_name= self.lct_attribute, debuglevel=4) coefficients = Coefficients() storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join(self.package_path, 'data')) coefficients.load(in_storage=storage, in_table_name="land_cover_change_model_coefficients") specification = EquationSpecification(in_storage=storage) specification.load(in_table_name="land_cover_change_model_specification") specification.set_variable_prefix("biocomplexity.land_cover.") constants = Constants() simulation_state = SimulationState() simulation_state.set_cache_directory(urbansim_cache_directory) attribute_cache = AttributeCache() index = arange(100000) for year in years: simulation_state.set_current_time(year) #land_cover_path = os.path.join(base_directory, str(year)) land_cover_path = base_directory land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), debuglevel=4) land_covers.subset_by_index(index) #land_covers.load_dataset() gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4) agents_index = None model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells, "constants":constants, "flush_variables":True}, chunk_specification = {'nchunks':1} ) land_covers.flush_dataset() del gridcells del land_covers
class LCCMEstimator(Estimator): def __init__(self, **kargs): # Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__ # Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed parent_dir_path = package().get_package_parent_path() package_path = OpusPackage().get_path_for_package("biocomplexity") self.storage = StorageFactory().get_storage( 'tab_storage', storage_location=os.path.join(package_path, 'data')) ## 1. directory path of full (4 county spatial extent) dataset flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_4County") ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus") flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig") ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995 ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999 ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years) # years = [1991, 1995] years = [1995, 1999] # years = [1999, 2002] self.lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory_est, str(years[0]))), resources=Resources({"lowercase": 1})) self.lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory_est, str(years[1]))), resources=Resources({"lowercase": 1})) self.lc1_all = LandCoverDataset( in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory, str(years[0]))), resources=Resources({"lowercase": 1})) self.lc1_all.flush_dataset() self.lc2_all = LandCoverDataset( in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory, str(years[1]))), resources=Resources({"lowercase": 1})) self.lc2_all.flush_dataset() def estimate(self, spec_py=None, spec_var=None, spec_file=None): t1 = time() if spec_py is not None: reload(spec_py) spec_var = spec_py.specification if spec_var is not None: self.specification, variables, coefficents, equations, submodels = \ self.load_specification_from_variable(spec_var) elif spec_file is not None: self.specification = EquationSpecification(in_storage=self.storage) self.specification.load(in_table_name=spec_file) self.specification.set_dataset_name_of_variables("land_cover") self.model_name = "land_cover_change_model" choices = range(1, 15) lccm = LandCoverChangeModel(choices, submodel_string="lct") ## 4. select (uncomment) from one the following choices of subsetted sampling files (agents_index) # agents_index = where(self.lc1.get_attribute("sall_91_95_0"))[0] agents_index = where(self.lc1.get_attribute("sall_95_99_0"))[0] # agents_index = where(self.lc1.get_attribute("sall_99_02_0b"))[0] # agents_index = where(self.lc1.get_attribute("sa9902_9195_0"))[0] # agents_index = where(self.lc1.get_attribute("sa9902_9599_0"))[0] # agents_index = where(self.lc1.get_attribute("suburb91_95sample0"))[0] # agents_index = where(self.lc1.get_attribute("suburb95_99sample0"))[0] # agents_index = where(self.lc1.get_attribute("up91x95_old_samp0"))[0] # agents_index = where(self.lc1.get_attribute("urbsamp95_99_0"))[0] ## need to include agents_index_all seperate for the calibration portion ## when using the dataset at the full extent, agents_index_all is needed as it is ## created from the lc1_all agents_set and matches the size of the input data ## 5. select (uncomment) from one the following choices of sampling files (agents_index) at full spatial extent # agents_index_all = where(self.lc1_all.get_attribute("sall_91_95_0"))[0] agents_index_all = where(self.lc1_all.get_attribute("sall_95_99_0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("sall_99_02_0b"))[0] # agents_index_all = where(self.lc1_all.get_attribute("sa9902_9195_0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("sa9902_9599_0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("suburb91_95sample0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("suburb95_99sample0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("up91x95_old_samp0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("urbsamp95_99_0"))[0] coef, results = lccm.estimate(self.specification, self.lc1, self.lc2, agents_index=agents_index, debuglevel=4) new_coef = lccm.calibrate(self.lc1_all, self.lc2_all, agents_index_all) specification = lccm.specification #save estimation results # out_suffix = spec_py.__name__[len(spec_py.__name__) - 11:] out_suffix = spec_py.__name__[30:] specification.write(out_storage=self.storage, out_table_name='lccm_specification_%sc' % out_suffix) new_coef.write(out_storage=self.storage, out_table_name='lccm_coefficients_%sc' % out_suffix) logger.log_status("Estimation done. %s s" % str(time() - t1)) def load_specification_from_variable(self, spec_var): variables = [] coefficients = [] equations = [] submodels = [] try: for sub_model, submodel_spec in spec_var.items(): if not isinstance(submodel_spec, dict): raise ValueError, "Wrong specification format" if submodel_spec.has_key("equation_ids"): equation_ids = submodel_spec[ "equation_ids"] ## this retrieves eq_ids from spec.py - they're stored in equations then passed to the equation specifications del submodel_spec["equation_ids"] else: equation_ids = None for var, coefs in submodel_spec.items(): if not equation_ids: equation_ids = range(1, len(coeffs) + 1) for i in range(len(coefs)): if coefs[i] != 0: variables.append(var) coefficients.append(coefs[i]) equations.append(equation_ids[i]) submodels.append(sub_model) except: raise ValueError, "Wrong specification format for submodel variable." specification = EquationSpecification(variables=variables, coefficients=coefficients, equations=equations, submodels=submodels) return (specification, variables, coefficients, equations, submodels)
class LCCMEstimator(Estimator): def __init__(self, **kargs): # Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__ # Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed parent_dir_path = package().get_package_parent_path() package_path = OpusPackage().get_path_for_package("biocomplexity") self.storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join(package_path, 'data')) ## 1. directory path of full (4 county spatial extent) dataset flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_4County") ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus") flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig") ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995 ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999 ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years) # years = [1991, 1995] years = [1995, 1999] # years = [1999, 2002] self.lc1 = LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", storage_location = os.path.join(flt_directory_est, str(years[0]))), resources=Resources({"lowercase":1})) self.lc2 = LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", storage_location = os.path.join(flt_directory_est, str(years[1]))), resources=Resources({"lowercase":1})) self.lc1_all = LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", storage_location = os.path.join(flt_directory, str(years[0]))), resources=Resources({"lowercase":1})) self.lc1_all.flush_dataset() self.lc2_all = LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", storage_location = os.path.join(flt_directory, str(years[1]))), resources=Resources({"lowercase":1})) self.lc2_all.flush_dataset() def estimate(self, spec_py=None, spec_var=None, spec_file=None): t1 = time() if spec_py is not None: reload(spec_py) spec_var = spec_py.specification if spec_var is not None: self.specification, variables, coefficents, equations, submodels = \ self.load_specification_from_variable(spec_var) elif spec_file is not None: self.specification = EquationSpecification(in_storage=self.storage) self.specification.load(in_table_name=spec_file) self.specification.set_dataset_name_of_variables("land_cover") self.model_name = "land_cover_change_model" choices = range(1,15) lccm = LandCoverChangeModel(choices, submodel_string="lct") ## 4. select (uncomment) from one the following choices of subsetted sampling files (agents_index) # agents_index = where(self.lc1.get_attribute("sall_91_95_0"))[0] agents_index = where(self.lc1.get_attribute("sall_95_99_0"))[0] # agents_index = where(self.lc1.get_attribute("sall_99_02_0b"))[0] # agents_index = where(self.lc1.get_attribute("sa9902_9195_0"))[0] # agents_index = where(self.lc1.get_attribute("sa9902_9599_0"))[0] # agents_index = where(self.lc1.get_attribute("suburb91_95sample0"))[0] # agents_index = where(self.lc1.get_attribute("suburb95_99sample0"))[0] # agents_index = where(self.lc1.get_attribute("up91x95_old_samp0"))[0] # agents_index = where(self.lc1.get_attribute("urbsamp95_99_0"))[0] ## need to include agents_index_all seperate for the calibration portion ## when using the dataset at the full extent, agents_index_all is needed as it is ## created from the lc1_all agents_set and matches the size of the input data ## 5. select (uncomment) from one the following choices of sampling files (agents_index) at full spatial extent # agents_index_all = where(self.lc1_all.get_attribute("sall_91_95_0"))[0] agents_index_all = where(self.lc1_all.get_attribute("sall_95_99_0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("sall_99_02_0b"))[0] # agents_index_all = where(self.lc1_all.get_attribute("sa9902_9195_0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("sa9902_9599_0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("suburb91_95sample0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("suburb95_99sample0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("up91x95_old_samp0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("urbsamp95_99_0"))[0] coef, results = lccm.estimate(self.specification, self.lc1, self.lc2, agents_index=agents_index, debuglevel=4) new_coef = lccm.calibrate(self.lc1_all, self.lc2_all, agents_index_all) specification = lccm.specification #save estimation results # out_suffix = spec_py.__name__[len(spec_py.__name__) - 11:] out_suffix = spec_py.__name__[30:] specification.write(out_storage=self.storage, out_table_name='lccm_specification_%sc' % out_suffix) new_coef.write(out_storage=self.storage, out_table_name='lccm_coefficients_%sc' % out_suffix) logger.log_status("Estimation done. %s s" % str(time()-t1)) def load_specification_from_variable(self, spec_var): variables = [] coefficients = [] equations = [] submodels = [] try: for sub_model, submodel_spec in spec_var.items(): if not isinstance(submodel_spec, dict): raise ValueError, "Wrong specification format" if submodel_spec.has_key("equation_ids"): equation_ids = submodel_spec["equation_ids"] ## this retrieves eq_ids from spec.py - they're stored in equations then passed to the equation specifications del submodel_spec["equation_ids"] else: equation_ids = None for var, coefs in submodel_spec.items(): if not equation_ids: equation_ids = range(1, len(coeffs)+1) for i in range(len(coefs)): if coefs[i] != 0: variables.append(var) coefficients.append(coefs[i]) equations.append(equation_ids[i]) submodels.append(sub_model) except: raise ValueError, "Wrong specification format for submodel variable." specification = EquationSpecification(variables=variables, coefficients=coefficients, equations = equations, submodels=submodels) return (specification, variables, coefficients, equations, submodels)
def run(self, base_directory, urbansim_cache_directory, years, output_directory, temp_folder, coefficients_name, specification_name, convert_flt=True, convert_input=False): """ run the simulation base_directory: directory contains all years folder of lccm. urbansim_cache_directory: directory contains all years folder of urbansim cache. years: lists of year to run.""" model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, choice_attribute_name=self.lct_attribute, debuglevel=4) coefficients = Coefficients() storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join(self.package_path, 'data')) coefficients.load(in_storage=storage, in_table_name=coefficients_name) specification = EquationSpecification(in_storage=storage) specification.load(in_table_name=specification_name) specification.set_variable_prefix("biocomplexity.land_cover.") constants = Constants() simulation_state = SimulationState() simulation_state.set_cache_directory(urbansim_cache_directory) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, package_order=['biocomplexity', 'urbansim', 'opus_core'], in_storage=AttributeCache()) ncols = LccmConfiguration.ncols if temp_folder is None: self.temp_land_cover_dir = tempfile.mkdtemp() else: self.temp_land_cover_dir = temp_folder for year in years: land_cover_path = self._generate_input_land_cover(year, base_directory, urbansim_cache_directory, years, output_directory, convert_flt, convert_input) #max_size = 174338406 (orig) - act. int: 19019944 (37632028 incl NoData) max_size = self._get_max_index(land_cover_path) # 1st instance of lc_dataset - but looks like a 'lite' version offset = min(LccmConfiguration.offset, max_size) s = 0 t = offset while (s < t and t <= max_size): logger.log_status("Offset: ", s, t) index = arange(s,t) land_cover_cache_path=os.path.join(urbansim_cache_directory,str(year),'land_covers') self._clean_up_land_cover_cache(land_cover_cache_path) simulation_state.set_current_time(year) # 2nd instance of lc_dataset land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), debuglevel=4) land_covers.subset_by_index(index) # land_covers.load_dataset() gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4) agents_index = None model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells, "constants":constants, "flush_variables":True}, chunk_specification = {'nchunks':5}) ## chunk size set here land_covers.flush_dataset() del gridcells del land_covers # self._generate_output_flt(year, urbansim_cache_directory, output_directory, convert_flt) self._generate_output_flt2(year, urbansim_cache_directory, output_directory, convert_flt) if t >= max_size: break s = max(t-10*ncols,s) t = min(t+offset-10*ncols,max_size) # clean up temp storage after done simulation shutil.rmtree(self.temp_land_cover_dir)