def _create_flt_file(self, current_year, flt_directory_in, flt_directory_out): logger.log_status("Convert output data for ", str(current_year)) flt_directory_out = os.path.join(flt_directory_out, 'land_covers') if not os.path.exists(flt_directory_out): os.makedirs(flt_directory_out) lc = LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in)) relative_x = lc.get_attribute("relative_x") relative_y = lc.get_attribute("relative_y") flat_indices = relative_x * self.ncols * 1.0 + relative_y if flat_indices[5*self.ncols:] is None or len(flat_indices[5*self.ncols:]) == 0: offset = 0 else: offset = 5*self.ncols logger.start_block("Converting") try: for attr_name in lc.get_primary_attribute_names(): if attr_name not in ["relative_x", "relative_y"]: attr_name = "lct" #-------------- only output lct now logger.log_status(" ", attr_name) attr = ma.filled(lc.get_attribute(attr_name), self.nodata_values).astype(float32) self._create_flt_file2(os.path.join(flt_directory_out, attr_name+".lf4"), attr, flat_indices, offset) self._create_header(os.path.join(flt_directory_out, attr_name+".hdr")) #<-- added 26 may 09 by mm del attr break #-------------- only output lct now lc.load_dataset(attributes='*') if lc.get_computed_attribute_names() is not None: flt_directory_out = os.path.join(flt_directory_out, "computed") if not os.path.exists(flt_directory_out): os.makedirs(flt_directory_out) for attr_name in lc.get_computed_attribute_names(): if attr_name not in ["_hidden_id_"]: if attr_name[0:5] == "probs": logger.log_status(" ", attr_name) attr = ma.filled(lc.get_attribute(attr_name), self.nodata_values).astype(float32) self._create_flt_file2(os.path.join(flt_directory_out, attr_name+".lf4"), attr, flat_indices, offset) self._create_header(os.path.join(flt_directory_out, attr_name+".hdr")) #<-- added 26 may 09 by mm del attr finally: # lc.flush_dataset() # added 23 jun 2009 - not tested... logger.end_block()
#index_attribute = sys.argv[2] index_attribute = "lc0207_100k_0" # 4. Years - date pair of input data; year is concatenated to flt_directory_in specified in #1 #years = [1991, 1995] #years = [1995, 1999] #years = [2002] #years = sys.argv[3] years = [2007, 2007] lc1 = LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = os.path.join(flt_directory_in, str(years[0]))), out_storage = StorageFactory().get_storage('flt_storage', storage_location = os.path.join(flt_directory_out, str(years[0])))) agents_index = where(lc1.get_attribute(index_attribute))[0] lc1subset = DatasetSubset(lc1, agents_index) print "Writing set 1:" for attr in lc1.get_primary_attribute_names(): print " ", attr lc1subset.write_dataset(attributes=[attr], out_table_name="land_covers") lc1.delete_one_attribute(attr) # leaving this line in causes the processing of every other input data file; commenting it causes memory error lc2 = LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = os.path.join(flt_directory_in, str(years[1]))), out_storage = StorageFactory().get_storage('flt_storage', storage_location = os.path.join(flt_directory_out, str(years[1])))) lc2subset = DatasetSubset(lc2, agents_index) print "Writing set 2:" for attr in lc2.get_primary_attribute_names():
class LCCMEstimator(Estimator): def __init__(self, **kargs): # Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__ # Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed parent_dir_path = package().get_package_parent_path() package_path = OpusPackage().get_path_for_package("biocomplexity") self.storage = StorageFactory().get_storage( 'tab_storage', storage_location=os.path.join(package_path, 'data')) ## 1. directory path of full (4 county spatial extent) dataset flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_4County") ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus") flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig") ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995 ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999 ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years) # years = [1991, 1995] years = [1995, 1999] # years = [1999, 2002] self.lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory_est, str(years[0]))), resources=Resources({"lowercase": 1})) self.lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory_est, str(years[1]))), resources=Resources({"lowercase": 1})) self.lc1_all = LandCoverDataset( in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory, str(years[0]))), resources=Resources({"lowercase": 1})) self.lc1_all.flush_dataset() self.lc2_all = LandCoverDataset( in_storage=StorageFactory().get_storage( "flt_storage", storage_location=os.path.join(flt_directory, str(years[1]))), resources=Resources({"lowercase": 1})) self.lc2_all.flush_dataset() def estimate(self, spec_py=None, spec_var=None, spec_file=None): t1 = time() if spec_py is not None: reload(spec_py) spec_var = spec_py.specification if spec_var is not None: self.specification, variables, coefficents, equations, submodels = \ self.load_specification_from_variable(spec_var) elif spec_file is not None: self.specification = EquationSpecification(in_storage=self.storage) self.specification.load(in_table_name=spec_file) self.specification.set_dataset_name_of_variables("land_cover") self.model_name = "land_cover_change_model" choices = range(1, 15) lccm = LandCoverChangeModel(choices, submodel_string="lct") ## 4. select (uncomment) from one the following choices of subsetted sampling files (agents_index) # agents_index = where(self.lc1.get_attribute("sall_91_95_0"))[0] agents_index = where(self.lc1.get_attribute("sall_95_99_0"))[0] # agents_index = where(self.lc1.get_attribute("sall_99_02_0b"))[0] # agents_index = where(self.lc1.get_attribute("sa9902_9195_0"))[0] # agents_index = where(self.lc1.get_attribute("sa9902_9599_0"))[0] # agents_index = where(self.lc1.get_attribute("suburb91_95sample0"))[0] # agents_index = where(self.lc1.get_attribute("suburb95_99sample0"))[0] # agents_index = where(self.lc1.get_attribute("up91x95_old_samp0"))[0] # agents_index = where(self.lc1.get_attribute("urbsamp95_99_0"))[0] ## need to include agents_index_all seperate for the calibration portion ## when using the dataset at the full extent, agents_index_all is needed as it is ## created from the lc1_all agents_set and matches the size of the input data ## 5. select (uncomment) from one the following choices of sampling files (agents_index) at full spatial extent # agents_index_all = where(self.lc1_all.get_attribute("sall_91_95_0"))[0] agents_index_all = where(self.lc1_all.get_attribute("sall_95_99_0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("sall_99_02_0b"))[0] # agents_index_all = where(self.lc1_all.get_attribute("sa9902_9195_0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("sa9902_9599_0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("suburb91_95sample0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("suburb95_99sample0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("up91x95_old_samp0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("urbsamp95_99_0"))[0] coef, results = lccm.estimate(self.specification, self.lc1, self.lc2, agents_index=agents_index, debuglevel=4) new_coef = lccm.calibrate(self.lc1_all, self.lc2_all, agents_index_all) specification = lccm.specification #save estimation results # out_suffix = spec_py.__name__[len(spec_py.__name__) - 11:] out_suffix = spec_py.__name__[30:] specification.write(out_storage=self.storage, out_table_name='lccm_specification_%sc' % out_suffix) new_coef.write(out_storage=self.storage, out_table_name='lccm_coefficients_%sc' % out_suffix) logger.log_status("Estimation done. %s s" % str(time() - t1)) def load_specification_from_variable(self, spec_var): variables = [] coefficients = [] equations = [] submodels = [] try: for sub_model, submodel_spec in spec_var.items(): if not isinstance(submodel_spec, dict): raise ValueError, "Wrong specification format" if submodel_spec.has_key("equation_ids"): equation_ids = submodel_spec[ "equation_ids"] ## this retrieves eq_ids from spec.py - they're stored in equations then passed to the equation specifications del submodel_spec["equation_ids"] else: equation_ids = None for var, coefs in submodel_spec.items(): if not equation_ids: equation_ids = range(1, len(coeffs) + 1) for i in range(len(coefs)): if coefs[i] != 0: variables.append(var) coefficients.append(coefs[i]) equations.append(equation_ids[i]) submodels.append(sub_model) except: raise ValueError, "Wrong specification format for submodel variable." specification = EquationSpecification(variables=variables, coefficients=coefficients, equations=equations, submodels=submodels) return (specification, variables, coefficients, equations, submodels)
test_flag = options.test_flag flt_directory_in = options.input flt_directory_out = options.output logger.log_status("Convert output data for ", str(current_year)) #todo: how to get 'land_covers' from dataset? flt_directory_out = os.path.join(flt_directory_out, 'land_covers') if not os.path.exists(flt_directory_out): os.makedirs(flt_directory_out) lc = LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in)) relative_x = lc.get_attribute("relative_x") relative_y = lc.get_attribute("relative_y") flat_indices = relative_x * ncols * 1.0 + relative_y if flat_indices[5*ncols:] is None or len(flat_indices[5*ncols:]) == 0: offset = 0 else: offset = 5*ncols #if os.path.exists("indices.lf4"): # os.remove("indices.lf4") #flat_indices.tofile("indices.lf4") logger.start_block("Converting") try: for attr_name in lc.get_primary_attribute_names():
def _create_flt_file(self, current_year, flt_directory_in, flt_directory_out): logger.log_status("Convert output data for ", str(current_year)) flt_directory_out = os.path.join(flt_directory_out, 'land_covers') if not os.path.exists(flt_directory_out): os.makedirs(flt_directory_out) lc = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=flt_directory_in)) relative_x = lc.get_attribute("relative_x") relative_y = lc.get_attribute("relative_y") flat_indices = relative_x * self.ncols * 1.0 + relative_y if flat_indices[5 * self.ncols:] is None or len( flat_indices[5 * self.ncols:]) == 0: offset = 0 else: offset = 5 * self.ncols logger.start_block("Converting") try: for attr_name in lc.get_primary_attribute_names(): if attr_name not in ["relative_x", "relative_y"]: attr_name = "lct" #-------------- only output lct now logger.log_status(" ", attr_name) attr = ma.filled(lc.get_attribute(attr_name), self.nodata_values).astype(float32) self._create_flt_file2( os.path.join(flt_directory_out, attr_name + ".lf4"), attr, flat_indices, offset) self._create_header( os.path.join(flt_directory_out, attr_name + ".hdr")) #<-- added 26 may 09 by mm del attr break #-------------- only output lct now lc.load_dataset(attributes='*') if lc.get_computed_attribute_names() is not None: flt_directory_out = os.path.join(flt_directory_out, "computed") if not os.path.exists(flt_directory_out): os.makedirs(flt_directory_out) for attr_name in lc.get_computed_attribute_names(): if attr_name not in ["_hidden_id_"]: if attr_name[0:5] == "probs": logger.log_status(" ", attr_name) attr = ma.filled( lc.get_attribute(attr_name), self.nodata_values).astype(float32) self._create_flt_file2( os.path.join(flt_directory_out, attr_name + ".lf4"), attr, flat_indices, offset) self._create_header( os.path.join( flt_directory_out, attr_name + ".hdr")) #<-- added 26 may 09 by mm del attr finally: # lc.flush_dataset() # added 23 jun 2009 - not tested... logger.end_block()
# 4. Years - date pair of input data; year is concatenated to flt_directory_in specified in #1 #years = [1991, 1995] #years = [1995, 1999] #years = [2002] #years = sys.argv[3] years = [2007, 2007] lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=os.path.join(flt_directory_in, str(years[0]))), out_storage=StorageFactory().get_storage( 'flt_storage', storage_location=os.path.join( flt_directory_out, str(years[0])))) agents_index = where(lc1.get_attribute(index_attribute))[0] lc1subset = DatasetSubset(lc1, agents_index) print "Writing set 1:" for attr in lc1.get_primary_attribute_names(): print " ", attr lc1subset.write_dataset(attributes=[attr], out_table_name="land_covers") lc1.delete_one_attribute( attr ) # leaving this line in causes the processing of every other input data file; commenting it causes memory error lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=os.path.join(flt_directory_in, str(years[1]))), out_storage=StorageFactory().get_storage( 'flt_storage', storage_location=os.path.join(
class LCCMEstimator(Estimator): def __init__(self, **kargs): # Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__ # Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed parent_dir_path = package().get_package_parent_path() package_path = OpusPackage().get_path_for_package("biocomplexity") self.storage = StorageFactory().get_storage('tab_storage', storage_location=os.path.join(package_path, 'data')) ## 1. directory path of full (4 county spatial extent) dataset flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_4County") ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus") flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban") # flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig") ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995 ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999 ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years) # years = [1991, 1995] years = [1995, 1999] # years = [1999, 2002] self.lc1 = LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", storage_location = os.path.join(flt_directory_est, str(years[0]))), resources=Resources({"lowercase":1})) self.lc2 = LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", storage_location = os.path.join(flt_directory_est, str(years[1]))), resources=Resources({"lowercase":1})) self.lc1_all = LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", storage_location = os.path.join(flt_directory, str(years[0]))), resources=Resources({"lowercase":1})) self.lc1_all.flush_dataset() self.lc2_all = LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", storage_location = os.path.join(flt_directory, str(years[1]))), resources=Resources({"lowercase":1})) self.lc2_all.flush_dataset() def estimate(self, spec_py=None, spec_var=None, spec_file=None): t1 = time() if spec_py is not None: reload(spec_py) spec_var = spec_py.specification if spec_var is not None: self.specification, variables, coefficents, equations, submodels = \ self.load_specification_from_variable(spec_var) elif spec_file is not None: self.specification = EquationSpecification(in_storage=self.storage) self.specification.load(in_table_name=spec_file) self.specification.set_dataset_name_of_variables("land_cover") self.model_name = "land_cover_change_model" choices = range(1,15) lccm = LandCoverChangeModel(choices, submodel_string="lct") ## 4. select (uncomment) from one the following choices of subsetted sampling files (agents_index) # agents_index = where(self.lc1.get_attribute("sall_91_95_0"))[0] agents_index = where(self.lc1.get_attribute("sall_95_99_0"))[0] # agents_index = where(self.lc1.get_attribute("sall_99_02_0b"))[0] # agents_index = where(self.lc1.get_attribute("sa9902_9195_0"))[0] # agents_index = where(self.lc1.get_attribute("sa9902_9599_0"))[0] # agents_index = where(self.lc1.get_attribute("suburb91_95sample0"))[0] # agents_index = where(self.lc1.get_attribute("suburb95_99sample0"))[0] # agents_index = where(self.lc1.get_attribute("up91x95_old_samp0"))[0] # agents_index = where(self.lc1.get_attribute("urbsamp95_99_0"))[0] ## need to include agents_index_all seperate for the calibration portion ## when using the dataset at the full extent, agents_index_all is needed as it is ## created from the lc1_all agents_set and matches the size of the input data ## 5. select (uncomment) from one the following choices of sampling files (agents_index) at full spatial extent # agents_index_all = where(self.lc1_all.get_attribute("sall_91_95_0"))[0] agents_index_all = where(self.lc1_all.get_attribute("sall_95_99_0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("sall_99_02_0b"))[0] # agents_index_all = where(self.lc1_all.get_attribute("sa9902_9195_0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("sa9902_9599_0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("suburb91_95sample0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("suburb95_99sample0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("up91x95_old_samp0"))[0] # agents_index_all = where(self.lc1_all.get_attribute("urbsamp95_99_0"))[0] coef, results = lccm.estimate(self.specification, self.lc1, self.lc2, agents_index=agents_index, debuglevel=4) new_coef = lccm.calibrate(self.lc1_all, self.lc2_all, agents_index_all) specification = lccm.specification #save estimation results # out_suffix = spec_py.__name__[len(spec_py.__name__) - 11:] out_suffix = spec_py.__name__[30:] specification.write(out_storage=self.storage, out_table_name='lccm_specification_%sc' % out_suffix) new_coef.write(out_storage=self.storage, out_table_name='lccm_coefficients_%sc' % out_suffix) logger.log_status("Estimation done. %s s" % str(time()-t1)) def load_specification_from_variable(self, spec_var): variables = [] coefficients = [] equations = [] submodels = [] try: for sub_model, submodel_spec in spec_var.items(): if not isinstance(submodel_spec, dict): raise ValueError, "Wrong specification format" if submodel_spec.has_key("equation_ids"): equation_ids = submodel_spec["equation_ids"] ## this retrieves eq_ids from spec.py - they're stored in equations then passed to the equation specifications del submodel_spec["equation_ids"] else: equation_ids = None for var, coefs in submodel_spec.items(): if not equation_ids: equation_ids = range(1, len(coeffs)+1) for i in range(len(coefs)): if coefs[i] != 0: variables.append(var) coefficients.append(coefs[i]) equations.append(equation_ids[i]) submodels.append(sub_model) except: raise ValueError, "Wrong specification format for submodel variable." specification = EquationSpecification(variables=variables, coefficients=coefficients, equations = equations, submodels=submodels) return (specification, variables, coefficients, equations, submodels)
test_flag = options.test_flag flt_directory_in = options.input flt_directory_out = options.output logger.log_status("Convert output data for ", str(current_year)) #todo: how to get 'land_covers' from dataset? flt_directory_out = os.path.join(flt_directory_out, 'land_covers') if not os.path.exists(flt_directory_out): os.makedirs(flt_directory_out) lc = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=flt_directory_in)) relative_x = lc.get_attribute("relative_x") relative_y = lc.get_attribute("relative_y") flat_indices = relative_x * ncols * 1.0 + relative_y if flat_indices[5 * ncols:] is None or len(flat_indices[5 * ncols:]) == 0: offset = 0 else: offset = 5 * ncols #if os.path.exists("indices.lf4"): # os.remove("indices.lf4") #flat_indices.tofile("indices.lf4") logger.start_block("Converting") try: for attr_name in lc.get_primary_attribute_names():
def do_test_on_expected_data(self, input_variables_list, input_resources=None, element_atol=None, sum_atol=None): from biocomplexity.datasets.land_cover_dataset import LandCoverDataset from biocomplexity.tests.utils.land_cover_tests_utils import make_input_data import os package_dir_path = package().get_package_path() flt_directory = os.path.join(package_dir_path, "data", "small_test_set_opus", "1995") #flt_directory = r"C:\eclipse\LCCM_small_test_set_converted\1991" expected_lc = LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory)) expected_lc.load_dataset() temp_dir = make_input_data(flt_directory, input_variables_list) try: lc = LandCoverDataset(in_storage = StorageFactory().get_storage( 'flt_storage', storage_location=temp_dir), out_storage = StorageFactory().get_storage('flt_storage', storage_location = r"c:/tmp")) lc.load_dataset() dataset_pool = DatasetPool( package_order=['biocomplexity'], storage=StorageFactory().get_storage('flt_storage', storage_location=temp_dir)) dataset_pool._add_dataset('land_cover', lc) lc.compute_variables(self.variable_name, resources=input_resources, dataset_pool=dataset_pool) #lc.write_dataset(attributes='*') lc_values = lc.get_attribute(self.variable_name) expected_values = expected_lc.get_attribute(self.variable_name) if sum_atol is None: sum_atol = 1e-8 if element_atol is None: element_atol = 1e-8 if (not ma.allclose(lc_values, expected_values, atol=element_atol)): logger.log_status("comparision using element-atol=%f, sum-atol=%f" % (element_atol, sum_atol)) logger.log_status(" computed expected"); logger.log_status("sum: ", lc_values.sum(), expected_values.sum()) logger.log_status("max: ", max(lc_values), max(expected_values)) logger.log_status("min: ", min(lc_values), min(expected_values)) c1 = 0 c2 = 0 for (i,j) in zip(lc_values, expected_values): if i != 0: c1 = c1 + 1 if j != 0: c2 = c2 + 1 logger.log_status("# non-zeros values: ", c1, c2) logger.log_status("max distance between 2 elements: %f" % self._max_distance_between_two_elements(lc_values,expected_values)) logger.log_status(lc_values, expected_values) count = 0 total = 0 for (i,j) in zip(lc_values, expected_values): if i != j: count = count + 1 total = total + 1 logger.log_status("# different elements = %d, over %d, with a %f percentage" \ % (count, total, count*1.0/total)) #self.assert_(ma.allclose(lc_values.sum(), expected_values.sum(), atol=sum_atol)) self.assert_(ma.allclose(lc_values, expected_values, atol=element_atol)) finally: if os.path.exists(temp_dir): rmtree(temp_dir)
def do_test_on_expected_data(self, input_variables_list, input_resources=None, element_atol=None, sum_atol=None): from biocomplexity.datasets.land_cover_dataset import LandCoverDataset from biocomplexity.tests.utils.land_cover_tests_utils import make_input_data import os package_dir_path = package().get_package_path() flt_directory = os.path.join(package_dir_path, "data", "small_test_set_opus", "1995") #flt_directory = r"C:\eclipse\LCCM_small_test_set_converted\1991" expected_lc = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=flt_directory)) expected_lc.load_dataset() temp_dir = make_input_data(flt_directory, input_variables_list) try: lc = LandCoverDataset(in_storage=StorageFactory().get_storage( 'flt_storage', storage_location=temp_dir), out_storage=StorageFactory().get_storage( 'flt_storage', storage_location=r"c:/tmp")) lc.load_dataset() dataset_pool = DatasetPool(package_order=['biocomplexity'], storage=StorageFactory().get_storage( 'flt_storage', storage_location=temp_dir)) dataset_pool._add_dataset('land_cover', lc) lc.compute_variables(self.variable_name, resources=input_resources, dataset_pool=dataset_pool) #lc.write_dataset(attributes='*') lc_values = lc.get_attribute(self.variable_name) expected_values = expected_lc.get_attribute(self.variable_name) if sum_atol is None: sum_atol = 1e-8 if element_atol is None: element_atol = 1e-8 if (not ma.allclose(lc_values, expected_values, atol=element_atol)): logger.log_status( "comparision using element-atol=%f, sum-atol=%f" % (element_atol, sum_atol)) logger.log_status(" computed expected") logger.log_status("sum: ", lc_values.sum(), expected_values.sum()) logger.log_status("max: ", max(lc_values), max(expected_values)) logger.log_status("min: ", min(lc_values), min(expected_values)) c1 = 0 c2 = 0 for (i, j) in zip(lc_values, expected_values): if i != 0: c1 = c1 + 1 if j != 0: c2 = c2 + 1 logger.log_status("# non-zeros values: ", c1, c2) logger.log_status("max distance between 2 elements: %f" % self._max_distance_between_two_elements( lc_values, expected_values)) logger.log_status(lc_values, expected_values) count = 0 total = 0 for (i, j) in zip(lc_values, expected_values): if i != j: count = count + 1 total = total + 1 logger.log_status("# different elements = %d, over %d, with a %f percentage" \ % (count, total, count*1.0/total)) #self.assert_(ma.allclose(lc_values.sum(), expected_values.sum(), atol=sum_atol)) self.assert_( ma.allclose(lc_values, expected_values, atol=element_atol)) finally: if os.path.exists(temp_dir): rmtree(temp_dir)