def __init__(self, **kargs): # Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__ # Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed parent_dir_path = package().get_package_parent_path() package_path = OpusPackage().get_path_for_package("biocomplexity") self.storage = StorageFactory().get_storage( 'tab_storage', storage_location=os.path.join(package_path, 'data')) ## 1. directory path of full (4 county spatial extent) dataset flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_4County") ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus") flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig") ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995 ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999 ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years) # years = [1991, 1995] years = [1995, 1999] # years = [1999, 2002] self.lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory_est, str(years[0]))), resources=Resources({"lowercase": 1})) self.lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory_est, str(years[1]))), resources=Resources({"lowercase": 1})) self.lc1_all = LandCoverDataset( in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory, str(years[0]))), resources=Resources({"lowercase": 1})) self.lc1_all.flush_dataset() self.lc2_all = LandCoverDataset( in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory, str(years[1]))), resources=Resources({"lowercase": 1})) self.lc2_all.flush_dataset()
def _convert_lccm_input(self, flt_directory_in, flt_directory_out): gc.collect() t1 = time() lc = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=flt_directory_in), out_storage=StorageFactory().get_storage( 'flt_storage', storage_location=flt_directory_out)) # lc.get_header() # added 23 june 2009 by mm mask = lc.get_mask() idx = where(mask == 0)[0] lcsubset = DatasetSubset(lc, idx) print "Converting:" lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers") lc.delete_one_attribute("relative_x") lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers") lc.delete_one_attribute("relative_y") lc.flush_dataset() gc.collect() # lc_names = lc.get_primary_attribute_names() for attr in lc.get_primary_attribute_names(): print " ", attr lcsubset.write_dataset(attributes=[attr], out_table_name="land_covers") lc.delete_one_attribute(attr) logger.log_status("Data conversion done. " + str(time() - t1) + " s")
def _copy_invariants_to_temp_land_cover_dir(self, land_cover_path): logger.log_status("temp input land cover data in " + self.temp_land_cover_dir) land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage("flt_storage", storage_location=land_cover_path), out_storage=StorageFactory().get_storage("flt_storage", storage_location=self.temp_land_cover_dir), out_table_name='land_covers', debuglevel=4) logger.log_status("Land cover dataset created.... ") # added dec 4, 2009 land_covers.flush_dataset() # added dec 4, 2009 land_covers.write_dataset(attributes=AttributeType.PRIMARY)
def run(self, base_directory, urbansim_cache_directory, years): """ run the simulation base_directory: directory contains all years folder of lccm. urbansim_cache_directory: directory contains all years folder of urbansim cache. years: lists of year to run.""" model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, choice_attribute_name=self.lct_attribute, debuglevel=4) coefficients = Coefficients() storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join( self.package_path, 'data')) coefficients.load(in_storage=storage, in_table_name="land_cover_change_model_coefficients") specification = EquationSpecification(in_storage=storage) specification.load( in_table_name="land_cover_change_model_specification") specification.set_variable_prefix("biocomplexity.land_cover.") constants = Constants() simulation_state = SimulationState() simulation_state.set_cache_directory(urbansim_cache_directory) attribute_cache = AttributeCache() index = arange(100000) for year in years: simulation_state.set_current_time(year) #land_cover_path = os.path.join(base_directory, str(year)) land_cover_path = base_directory land_covers = LandCoverDataset( in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=land_cover_path), out_storage=StorageFactory().get_storage( 'flt_storage', storage_location=land_cover_path), debuglevel=4) land_covers.subset_by_index(index) #land_covers.load_dataset() gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4) agents_index = None model.run(specification, coefficients, land_covers, data_objects={ "gridcell": gridcells, "constants": constants, "flush_variables": True }, chunk_specification={'nchunks': 1}) land_covers.flush_dataset() del gridcells del land_covers
def compute_computed_variables(self, base_directory, urbansim_cache_directory, years): for year in [years[0]]: land_cover_path = os.path.join(base_directory, str(year)) #print land_cover_path land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path)) land_covers.load_dataset() #print land_covers.get_attribute("devgrid_id") gridcell_path = os.path.join(urbansim_cache_directory) gridcells = GridcellDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=gridcell_path)) gridcells.load_dataset() #print gridcells.summary() ## BUG: dataset_pool is not defined land_covers.compute_variables(self.land_cover_urbansim_output_variables, dataset_pool=dataset_pool) land_covers.write_dataset(attributes='*') #land_covers.flush_dataset() del gridcells del land_covers
input_year = sys.argv[1] flt_directory_in = options.input flt_directory_out = options.output print flt_directory_out test_flag = options.test_flag shutil.rmtree(flt_directory_out) os.mkdir(flt_directory_out) logger.log_status("Convert input data from ", str(input_year)) lc = LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in), out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_out), debuglevel=4) lc.get_header() # mask = lc.get_mask() # idx = where(mask==0)[0] # lcsubset = DatasetSubset(lc, idx) print "Creating and writing relative_x and relative_y:" lc.write_dataset(attributes=["relative_x"], out_table_name="land_covers", valuetypes=valuetypes) lc.delete_one_attribute("relative_x") lc.write_dataset(attributes=["relative_y"], out_table_name="land_covers", valuetypes=valuetypes) lc.delete_one_attribute("relative_y") print "done."
# Opus/UrbanSim urban simulation software. # Copyright (C) 2005-2009 University of Washington # See opus_core/LICENSE from biocomplexity.datasets.land_cover_dataset import LandCoverDataset from biocomplexity.opus_package_info import package from opus_core.storage_factory import StorageFactory import os parent_dir_path = package().get_package_parent_path() flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus", "1991") if __name__ == "__main__": lc = LandCoverDataset(in_storage=StorageFactory().get_storage( "flt_storage", storage_location=flt_directory)) lc.summary() for attr in lc.get_attribute_names(): print attr lc.plot_map(attr, main=attr)
def _create_flt_file(self, current_year, flt_directory_in, flt_directory_out): logger.log_status("Convert output data for ", str(current_year)) flt_directory_out = os.path.join(flt_directory_out, 'land_covers') if not os.path.exists(flt_directory_out): os.makedirs(flt_directory_out) lc = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=flt_directory_in)) relative_x = lc.get_attribute("relative_x") relative_y = lc.get_attribute("relative_y") flat_indices = relative_x * self.ncols * 1.0 + relative_y if flat_indices[5 * self.ncols:] is None or len( flat_indices[5 * self.ncols:]) == 0: offset = 0 else: offset = 5 * self.ncols logger.start_block("Converting") try: for attr_name in lc.get_primary_attribute_names(): if attr_name not in ["relative_x", "relative_y"]: attr_name = "lct" #-------------- only output lct now logger.log_status(" ", attr_name) attr = ma.filled(lc.get_attribute(attr_name), self.nodata_values).astype(float32) self._create_flt_file2( os.path.join(flt_directory_out, attr_name + ".lf4"), attr, flat_indices, offset) self._create_header( os.path.join(flt_directory_out, attr_name + ".hdr")) #<-- added 26 may 09 by mm del attr break #-------------- only output lct now lc.load_dataset(attributes='*') if lc.get_computed_attribute_names() is not None: flt_directory_out = os.path.join(flt_directory_out, "computed") if not os.path.exists(flt_directory_out): os.makedirs(flt_directory_out) for attr_name in lc.get_computed_attribute_names(): if attr_name not in ["_hidden_id_"]: if attr_name[0:5] == "probs": logger.log_status(" ", attr_name) attr = ma.filled( lc.get_attribute(attr_name), self.nodata_values).astype(float32) self._create_flt_file2( os.path.join(flt_directory_out, attr_name + ".lf4"), attr, flat_indices, offset) self._create_header( os.path.join( flt_directory_out, attr_name + ".hdr")) #<-- added 26 may 09 by mm del attr finally: # lc.flush_dataset() # added 23 jun 2009 - not tested... logger.end_block()
#index_attribute = "sa9902_9599_0" #index_attribute = "sall_99_02_0v1" #index_attribute = sys.argv[2] index_attribute = "lc0207_100k_0" # 4. Years - date pair of input data; year is concatenated to flt_directory_in specified in #1 #years = [1991, 1995] #years = [1995, 1999] #years = [2002] #years = sys.argv[3] years = [2007, 2007] lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=os.path.join(flt_directory_in, str(years[0]))), out_storage=StorageFactory().get_storage( 'flt_storage', storage_location=os.path.join( flt_directory_out, str(years[0])))) agents_index = where(lc1.get_attribute(index_attribute))[0] lc1subset = DatasetSubset(lc1, agents_index) print "Writing set 1:" for attr in lc1.get_primary_attribute_names(): print " ", attr lc1subset.write_dataset(attributes=[attr], out_table_name="land_covers") lc1.delete_one_attribute( attr ) # leaving this line in causes the processing of every other input data file; commenting it causes memory error lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage(
def _get_max_index(self, land_cover_path): land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage("flt_storage", storage_location=land_cover_path)) return land_covers.size()
def run(self, base_directory, urbansim_cache_directory, years, output_directory, temp_folder, coefficients_name, specification_name, convert_flt=True, convert_input=False): """ run the simulation base_directory: directory contains all years folder of lccm. urbansim_cache_directory: directory contains all years folder of urbansim cache. years: lists of year to run.""" model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, choice_attribute_name=self.lct_attribute, debuglevel=4) coefficients = Coefficients() storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join(self.package_path, 'data')) coefficients.load(in_storage=storage, in_table_name=coefficients_name) specification = EquationSpecification(in_storage=storage) specification.load(in_table_name=specification_name) specification.set_variable_prefix("biocomplexity.land_cover.") constants = Constants() simulation_state = SimulationState() simulation_state.set_cache_directory(urbansim_cache_directory) attribute_cache = AttributeCache() SessionConfiguration(new_instance=True, package_order=['biocomplexity', 'urbansim', 'opus_core'], in_storage=AttributeCache()) ncols = LccmConfiguration.ncols if temp_folder is None: self.temp_land_cover_dir = tempfile.mkdtemp() else: self.temp_land_cover_dir = temp_folder for year in years: land_cover_path = self._generate_input_land_cover(year, base_directory, urbansim_cache_directory, years, output_directory, convert_flt, convert_input) #max_size = 174338406 (orig) - act. int: 19019944 (37632028 incl NoData) max_size = self._get_max_index(land_cover_path) # 1st instance of lc_dataset - but looks like a 'lite' version offset = min(LccmConfiguration.offset, max_size) s = 0 t = offset while (s < t and t <= max_size): logger.log_status("Offset: ", s, t) index = arange(s,t) land_cover_cache_path=os.path.join(urbansim_cache_directory,str(year),'land_covers') self._clean_up_land_cover_cache(land_cover_cache_path) simulation_state.set_current_time(year) # 2nd instance of lc_dataset land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path), debuglevel=4) land_covers.subset_by_index(index) # land_covers.load_dataset() gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4) agents_index = None model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells, "constants":constants, "flush_variables":True}, chunk_specification = {'nchunks':5}) ## chunk size set here land_covers.flush_dataset() del gridcells del land_covers # self._generate_output_flt(year, urbansim_cache_directory, output_directory, convert_flt) self._generate_output_flt2(year, urbansim_cache_directory, output_directory, convert_flt) if t >= max_size: break s = max(t-10*ncols,s) t = min(t+offset-10*ncols,max_size) # clean up temp storage after done simulation shutil.rmtree(self.temp_land_cover_dir)
current_year = sys.argv[1] test_flag = options.test_flag flt_directory_in = options.input flt_directory_out = options.output logger.log_status("Convert output data for ", str(current_year)) #todo: how to get 'land_covers' from dataset? flt_directory_out = os.path.join(flt_directory_out, 'land_covers') if not os.path.exists(flt_directory_out): os.makedirs(flt_directory_out) lc = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=flt_directory_in)) relative_x = lc.get_attribute("relative_x") relative_y = lc.get_attribute("relative_y") flat_indices = relative_x * ncols * 1.0 + relative_y if flat_indices[5 * ncols:] is None or len(flat_indices[5 * ncols:]) == 0: offset = 0 else: offset = 5 * ncols #if os.path.exists("indices.lf4"): # os.remove("indices.lf4") #flat_indices.tofile("indices.lf4") logger.start_block("Converting") try:
def get_resources(self, data_dictionary, dataset): """Create resources for computing a variable. """ resources = Resources() for key in data_dictionary.keys(): if key in self.datasets: data = data_dictionary[key] if self.id_names[key] not in data_dictionary[key].keys( ) and not isinstance(self.id_names[key], list): data[self.id_names[key]] = arange(1,\ len(data_dictionary[key][data_dictionary[key].keys()[0]])+1) # add id array if key == "land_cover": land_cover_storage = StorageFactory().get_storage( 'dict_storage') land_cover_table_name = 'land_cover' land_cover_storage.write_table( table_name=land_cover_table_name, table_data=data, ) lc = LandCoverDataset( in_storage=land_cover_storage, in_table_name=land_cover_table_name, ) # add relative_x and relative_y lc.get_id_attribute() n = int(ceil(sqrt(lc.size()))) if "relative_x" not in data.keys(): x = (indices((n, n)) + 1)[1].ravel() lc.add_attribute(x[0:lc.size()], "relative_x", metadata=1) if "relative_y" not in data.keys(): y = (indices((n, n)) + 1)[0].ravel() lc.add_attribute(y[0:lc.size()], "relative_y", metadata=1) resources.merge({key: lc}) if key == "gridcell": gridcell_storage = StorageFactory().get_storage( 'dict_storage') gridcell_table_name = 'gridcell' gridcell_storage.write_table( table_name=gridcell_table_name, table_data=data, ) gridcell_dataset = GridcellDataset( in_storage=gridcell_storage, in_table_name=gridcell_table_name, ) resources.merge({key: gridcell_dataset}) else: resources.merge({key: data_dictionary[key]}) if dataset in self.interactions: pass else: resources.merge({"dataset": resources[dataset]}) resources.merge({"check_variables": '*', "debug": 4}) return resources
def do_test_on_expected_data(self, input_variables_list, input_resources=None, element_atol=None, sum_atol=None): from biocomplexity.datasets.land_cover_dataset import LandCoverDataset from biocomplexity.tests.utils.land_cover_tests_utils import make_input_data import os package_dir_path = package().get_package_path() flt_directory = os.path.join(package_dir_path, "data", "small_test_set_opus", "1995") #flt_directory = r"C:\eclipse\LCCM_small_test_set_converted\1991" expected_lc = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=flt_directory)) expected_lc.load_dataset() temp_dir = make_input_data(flt_directory, input_variables_list) try: lc = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=temp_dir), out_storage=StorageFactory().get_storage( 'flt_storage', storage_location=r"c:/tmp")) lc.load_dataset() dataset_pool = DatasetPool(package_order=['biocomplexity'], storage=StorageFactory().get_storage( 'flt_storage', storage_location=temp_dir)) dataset_pool._add_dataset('land_cover', lc) lc.compute_variables(self.variable_name, resources=input_resources, dataset_pool=dataset_pool) #lc.write_dataset(attributes='*') lc_values = lc.get_attribute(self.variable_name) expected_values = expected_lc.get_attribute(self.variable_name) if sum_atol is None: sum_atol = 1e-8 if element_atol is None: element_atol = 1e-8 if (not ma.allclose(lc_values, expected_values, atol=element_atol)): logger.log_status( "comparision using element-atol=%f, sum-atol=%f" % (element_atol, sum_atol)) logger.log_status(" computed expected") logger.log_status("sum: ", lc_values.sum(), expected_values.sum()) logger.log_status("max: ", max(lc_values), max(expected_values)) logger.log_status("min: ", min(lc_values), min(expected_values)) c1 = 0 c2 = 0 for (i, j) in zip(lc_values, expected_values): if i != 0: c1 = c1 + 1 if j != 0: c2 = c2 + 1 logger.log_status("# non-zeros values: ", c1, c2) logger.log_status("max distance between 2 elements: %f" % self._max_distance_between_two_elements( lc_values, expected_values)) logger.log_status(lc_values, expected_values) count = 0 total = 0 for (i, j) in zip(lc_values, expected_values): if i != j: count = count + 1 total = total + 1 logger.log_status("# different elements = %d, over %d, with a %f percentage" \ % (count, total, count*1.0/total)) #self.assert_(ma.allclose(lc_values.sum(), expected_values.sum(), atol=sum_atol)) self.assert_( ma.allclose(lc_values, expected_values, atol=element_atol)) finally: if os.path.exists(temp_dir): rmtree(temp_dir)