Ejemplo n.º 1
0
    def _create_flt_file(self, current_year, flt_directory_in, flt_directory_out):

        logger.log_status("Convert output data for ", str(current_year))
        
        flt_directory_out = os.path.join(flt_directory_out, 'land_covers')    
        
        if not os.path.exists(flt_directory_out):
            os.makedirs(flt_directory_out)

        lc =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in))
        relative_x = lc.get_attribute("relative_x")
        relative_y = lc.get_attribute("relative_y")
        flat_indices = relative_x * self.ncols * 1.0 + relative_y
        
        if flat_indices[5*self.ncols:] is None or len(flat_indices[5*self.ncols:]) == 0:
            offset = 0
        else:
            offset = 5*self.ncols
        
        logger.start_block("Converting")
        try:    
            for attr_name in lc.get_primary_attribute_names():
                if attr_name not in ["relative_x", "relative_y"]:
                    attr_name = "lct" #-------------- only output lct now
                    logger.log_status("    ", attr_name)
                    attr = ma.filled(lc.get_attribute(attr_name), self.nodata_values).astype(float32)
                    self._create_flt_file2(os.path.join(flt_directory_out, attr_name+".lf4"), attr, flat_indices, offset)
                    self._create_header(os.path.join(flt_directory_out, attr_name+".hdr")) #<-- added 26 may 09 by mm
                    del attr
                    break #-------------- only output lct now
                    
            lc.load_dataset(attributes='*')
            if lc.get_computed_attribute_names() is not None:        
                flt_directory_out = os.path.join(flt_directory_out, "computed")
                if not os.path.exists(flt_directory_out):
                    os.makedirs(flt_directory_out)
                for attr_name in lc.get_computed_attribute_names():
                    if attr_name not in ["_hidden_id_"]:
                      if attr_name[0:5] == "probs":
                        logger.log_status("    ", attr_name)
                        attr = ma.filled(lc.get_attribute(attr_name), self.nodata_values).astype(float32)
                        self._create_flt_file2(os.path.join(flt_directory_out, attr_name+".lf4"), attr, flat_indices, offset)
                        self._create_header(os.path.join(flt_directory_out, attr_name+".hdr")) #<-- added 26 may 09 by mm
                        del attr
        finally:
#            lc.flush_dataset() # added 23 jun 2009 - not tested...
            logger.end_block()
#index_attribute = sys.argv[2]
index_attribute = "lc0207_100k_0"

# 4. Years - date pair of input data; year is concatenated to flt_directory_in specified in #1
#years = [1991, 1995]
#years = [1995, 1999]
#years = [2002]
#years = sys.argv[3]
years = [2007, 2007]

lc1 =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', 
        storage_location = os.path.join(flt_directory_in, str(years[0]))),
    out_storage = StorageFactory().get_storage('flt_storage', 
        storage_location = os.path.join(flt_directory_out, str(years[0]))))

agents_index = where(lc1.get_attribute(index_attribute))[0]
lc1subset = DatasetSubset(lc1, agents_index)
print "Writing set 1:"
for attr in lc1.get_primary_attribute_names():
    print "   ", attr
    lc1subset.write_dataset(attributes=[attr], out_table_name="land_covers")
    lc1.delete_one_attribute(attr) # leaving this line in causes the processing of every other input data file; commenting it causes memory error
    
lc2 =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', 
        storage_location = os.path.join(flt_directory_in, str(years[1]))),
    out_storage = StorageFactory().get_storage('flt_storage',
        storage_location = os.path.join(flt_directory_out, str(years[1]))))
                  
lc2subset = DatasetSubset(lc2, agents_index)
print "Writing set 2:"
for attr in lc2.get_primary_attribute_names():
Ejemplo n.º 3
0
class LCCMEstimator(Estimator):
    def __init__(self, **kargs):
        #        Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__
        #        Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed

        parent_dir_path = package().get_package_parent_path()
        package_path = OpusPackage().get_path_for_package("biocomplexity")
        self.storage = StorageFactory().get_storage(
            'tab_storage', storage_location=os.path.join(package_path, 'data'))

        ## 1. directory path of full (4 county spatial extent) dataset
        flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data",
                                     "LCCM_4County")

        ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus")
        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity",
                                         "data", "data_for_estimation_all")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig")

        ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995
        ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999

        ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years)
        #        years = [1991, 1995]
        years = [1995, 1999]
        #        years = [1999, 2002]

        self.lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage(
            "flt_storage",
            storage_location=os.path.join(flt_directory_est, str(years[0]))),
                                    resources=Resources({"lowercase": 1}))
        self.lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage(
            "flt_storage",
            storage_location=os.path.join(flt_directory_est, str(years[1]))),
                                    resources=Resources({"lowercase": 1}))

        self.lc1_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage",
                storage_location=os.path.join(flt_directory, str(years[0]))),
            resources=Resources({"lowercase": 1}))
        self.lc1_all.flush_dataset()
        self.lc2_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage",
                storage_location=os.path.join(flt_directory, str(years[1]))),
            resources=Resources({"lowercase": 1}))
        self.lc2_all.flush_dataset()

    def estimate(self, spec_py=None, spec_var=None, spec_file=None):
        t1 = time()
        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification
        if spec_var is not None:
            self.specification, variables, coefficents, equations, submodels = \
                self.load_specification_from_variable(spec_var)
        elif spec_file is not None:
            self.specification = EquationSpecification(in_storage=self.storage)
            self.specification.load(in_table_name=spec_file)

        self.specification.set_dataset_name_of_variables("land_cover")

        self.model_name = "land_cover_change_model"
        choices = range(1, 15)
        lccm = LandCoverChangeModel(choices, submodel_string="lct")

        ## 4. select (uncomment) from one the following choices of subsetted sampling files (agents_index)
        #        agents_index = where(self.lc1.get_attribute("sall_91_95_0"))[0]
        agents_index = where(self.lc1.get_attribute("sall_95_99_0"))[0]
        #        agents_index = where(self.lc1.get_attribute("sall_99_02_0b"))[0]
        #        agents_index = where(self.lc1.get_attribute("sa9902_9195_0"))[0]
        #        agents_index = where(self.lc1.get_attribute("sa9902_9599_0"))[0]
        #        agents_index = where(self.lc1.get_attribute("suburb91_95sample0"))[0]
        #        agents_index = where(self.lc1.get_attribute("suburb95_99sample0"))[0]
        #        agents_index = where(self.lc1.get_attribute("up91x95_old_samp0"))[0]
        #        agents_index = where(self.lc1.get_attribute("urbsamp95_99_0"))[0]

        ## need to include agents_index_all seperate for the calibration portion
        ##    when using the dataset at the full extent, agents_index_all is needed as it is
        ##    created from the lc1_all agents_set and matches the size of the input data

        ## 5. select (uncomment) from one the following choices of sampling files (agents_index) at full spatial extent
        #        agents_index_all = where(self.lc1_all.get_attribute("sall_91_95_0"))[0]
        agents_index_all = where(self.lc1_all.get_attribute("sall_95_99_0"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("sall_99_02_0b"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("sa9902_9195_0"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("sa9902_9599_0"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("suburb91_95sample0"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("suburb95_99sample0"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("up91x95_old_samp0"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("urbsamp95_99_0"))[0]

        coef, results = lccm.estimate(self.specification,
                                      self.lc1,
                                      self.lc2,
                                      agents_index=agents_index,
                                      debuglevel=4)
        new_coef = lccm.calibrate(self.lc1_all, self.lc2_all, agents_index_all)
        specification = lccm.specification

        #save estimation results
        #        out_suffix = spec_py.__name__[len(spec_py.__name__) - 11:]
        out_suffix = spec_py.__name__[30:]
        specification.write(out_storage=self.storage,
                            out_table_name='lccm_specification_%sc' %
                            out_suffix)
        new_coef.write(out_storage=self.storage,
                       out_table_name='lccm_coefficients_%sc' % out_suffix)

        logger.log_status("Estimation done. %s s" % str(time() - t1))

    def load_specification_from_variable(self, spec_var):
        variables = []
        coefficients = []
        equations = []
        submodels = []
        try:
            for sub_model, submodel_spec in spec_var.items():
                if not isinstance(submodel_spec, dict):
                    raise ValueError, "Wrong specification format"
                if submodel_spec.has_key("equation_ids"):
                    equation_ids = submodel_spec[
                        "equation_ids"]  ## this retrieves eq_ids from spec.py - they're stored in equations then passed to the equation specifications
                    del submodel_spec["equation_ids"]
                else:
                    equation_ids = None
                for var, coefs in submodel_spec.items():
                    if not equation_ids:
                        equation_ids = range(1, len(coeffs) + 1)
                    for i in range(len(coefs)):
                        if coefs[i] != 0:
                            variables.append(var)
                            coefficients.append(coefs[i])
                            equations.append(equation_ids[i])
                            submodels.append(sub_model)
        except:
            raise ValueError, "Wrong specification format for submodel variable."

        specification = EquationSpecification(variables=variables,
                                              coefficients=coefficients,
                                              equations=equations,
                                              submodels=submodels)
        return (specification, variables, coefficients, equations, submodels)
Ejemplo n.º 4
0
     
     test_flag = options.test_flag        
 
     flt_directory_in = options.input
     flt_directory_out = options.output
 
     logger.log_status("Convert output data for ", str(current_year))
 
 #todo: how to get 'land_covers' from dataset?
 flt_directory_out = os.path.join(flt_directory_out, 'land_covers')    
 
 if not os.path.exists(flt_directory_out):
     os.makedirs(flt_directory_out)
 
 lc =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in))
 relative_x = lc.get_attribute("relative_x")
 relative_y = lc.get_attribute("relative_y")
 flat_indices = relative_x * ncols * 1.0 + relative_y
 
 if flat_indices[5*ncols:] is None or len(flat_indices[5*ncols:]) == 0:
     offset = 0
 else:
     offset = 5*ncols
 
 #if os.path.exists("indices.lf4"):
 #    os.remove("indices.lf4")
 #flat_indices.tofile("indices.lf4")
 
 logger.start_block("Converting")
 try:    
     for attr_name in lc.get_primary_attribute_names():
Ejemplo n.º 5
0
    def _create_flt_file(self, current_year, flt_directory_in,
                         flt_directory_out):

        logger.log_status("Convert output data for ", str(current_year))

        flt_directory_out = os.path.join(flt_directory_out, 'land_covers')

        if not os.path.exists(flt_directory_out):
            os.makedirs(flt_directory_out)

        lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
            'flt_storage', storage_location=flt_directory_in))
        relative_x = lc.get_attribute("relative_x")
        relative_y = lc.get_attribute("relative_y")
        flat_indices = relative_x * self.ncols * 1.0 + relative_y

        if flat_indices[5 * self.ncols:] is None or len(
                flat_indices[5 * self.ncols:]) == 0:
            offset = 0
        else:
            offset = 5 * self.ncols

        logger.start_block("Converting")
        try:
            for attr_name in lc.get_primary_attribute_names():
                if attr_name not in ["relative_x", "relative_y"]:
                    attr_name = "lct"  #-------------- only output lct now
                    logger.log_status("    ", attr_name)
                    attr = ma.filled(lc.get_attribute(attr_name),
                                     self.nodata_values).astype(float32)
                    self._create_flt_file2(
                        os.path.join(flt_directory_out, attr_name + ".lf4"),
                        attr, flat_indices, offset)
                    self._create_header(
                        os.path.join(flt_directory_out, attr_name +
                                     ".hdr"))  #<-- added 26 may 09 by mm
                    del attr
                    break  #-------------- only output lct now

            lc.load_dataset(attributes='*')
            if lc.get_computed_attribute_names() is not None:
                flt_directory_out = os.path.join(flt_directory_out, "computed")
                if not os.path.exists(flt_directory_out):
                    os.makedirs(flt_directory_out)
                for attr_name in lc.get_computed_attribute_names():
                    if attr_name not in ["_hidden_id_"]:
                        if attr_name[0:5] == "probs":
                            logger.log_status("    ", attr_name)
                            attr = ma.filled(
                                lc.get_attribute(attr_name),
                                self.nodata_values).astype(float32)
                            self._create_flt_file2(
                                os.path.join(flt_directory_out,
                                             attr_name + ".lf4"), attr,
                                flat_indices, offset)
                            self._create_header(
                                os.path.join(
                                    flt_directory_out, attr_name +
                                    ".hdr"))  #<-- added 26 may 09 by mm
                            del attr
        finally:
            #            lc.flush_dataset() # added 23 jun 2009 - not tested...
            logger.end_block()
Ejemplo n.º 6
0
# 4. Years - date pair of input data; year is concatenated to flt_directory_in specified in #1
#years = [1991, 1995]
#years = [1995, 1999]
#years = [2002]
#years = sys.argv[3]
years = [2007, 2007]

lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage(
    'flt_storage',
    storage_location=os.path.join(flt_directory_in, str(years[0]))),
                       out_storage=StorageFactory().get_storage(
                           'flt_storage',
                           storage_location=os.path.join(
                               flt_directory_out, str(years[0]))))

agents_index = where(lc1.get_attribute(index_attribute))[0]
lc1subset = DatasetSubset(lc1, agents_index)
print "Writing set 1:"
for attr in lc1.get_primary_attribute_names():
    print "   ", attr
    lc1subset.write_dataset(attributes=[attr], out_table_name="land_covers")
    lc1.delete_one_attribute(
        attr
    )  # leaving this line in causes the processing of every other input data file; commenting it causes memory error

lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage(
    'flt_storage',
    storage_location=os.path.join(flt_directory_in, str(years[1]))),
                       out_storage=StorageFactory().get_storage(
                           'flt_storage',
                           storage_location=os.path.join(
Ejemplo n.º 7
0
class LCCMEstimator(Estimator):
    def __init__(self, **kargs):
#        Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__
#        Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed

        parent_dir_path = package().get_package_parent_path()
        package_path = OpusPackage().get_path_for_package("biocomplexity")  
        self.storage = StorageFactory().get_storage('tab_storage', 
            storage_location=os.path.join(package_path, 'data'))

        ## 1. directory path of full (4 county spatial extent) dataset
        flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_4County")

        ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables
#        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus")
        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all")
#        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig")
#        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig")
#        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban")
#        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig")

        ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995
        ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999
        
        ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years) 
#        years = [1991, 1995]
        years = [1995, 1999]
#        years = [1999, 2002]
        
        self.lc1 =  LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", 
            storage_location = os.path.join(flt_directory_est, str(years[0]))),
            resources=Resources({"lowercase":1}))
        self.lc2 =  LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", 
            storage_location = os.path.join(flt_directory_est, str(years[1]))),
            resources=Resources({"lowercase":1}))
        
        self.lc1_all =  LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", 
            storage_location = os.path.join(flt_directory, str(years[0]))),
            resources=Resources({"lowercase":1}))
        self.lc1_all.flush_dataset()
        self.lc2_all =  LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", 
            storage_location = os.path.join(flt_directory, str(years[1]))),
            resources=Resources({"lowercase":1}))
        self.lc2_all.flush_dataset()
        
    def estimate(self, spec_py=None, spec_var=None, spec_file=None):
        t1 = time()
        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification
        if spec_var is not None:
            self.specification, variables, coefficents, equations, submodels = \
                self.load_specification_from_variable(spec_var)
        elif spec_file is not None:
            self.specification = EquationSpecification(in_storage=self.storage)
            self.specification.load(in_table_name=spec_file)

        self.specification.set_dataset_name_of_variables("land_cover")
        
        self.model_name = "land_cover_change_model"
        choices = range(1,15)
        lccm = LandCoverChangeModel(choices, submodel_string="lct")

        ## 4. select (uncomment) from one the following choices of subsetted sampling files (agents_index)
#        agents_index = where(self.lc1.get_attribute("sall_91_95_0"))[0]
        agents_index = where(self.lc1.get_attribute("sall_95_99_0"))[0]
#        agents_index = where(self.lc1.get_attribute("sall_99_02_0b"))[0]
#        agents_index = where(self.lc1.get_attribute("sa9902_9195_0"))[0]
#        agents_index = where(self.lc1.get_attribute("sa9902_9599_0"))[0]
#        agents_index = where(self.lc1.get_attribute("suburb91_95sample0"))[0]
#        agents_index = where(self.lc1.get_attribute("suburb95_99sample0"))[0]
#        agents_index = where(self.lc1.get_attribute("up91x95_old_samp0"))[0]
#        agents_index = where(self.lc1.get_attribute("urbsamp95_99_0"))[0]

        ## need to include agents_index_all seperate for the calibration portion
        ##    when using the dataset at the full extent, agents_index_all is needed as it is
        ##    created from the lc1_all agents_set and matches the size of the input data

        ## 5. select (uncomment) from one the following choices of sampling files (agents_index) at full spatial extent
#        agents_index_all = where(self.lc1_all.get_attribute("sall_91_95_0"))[0]
        agents_index_all = where(self.lc1_all.get_attribute("sall_95_99_0"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("sall_99_02_0b"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("sa9902_9195_0"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("sa9902_9599_0"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("suburb91_95sample0"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("suburb95_99sample0"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("up91x95_old_samp0"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("urbsamp95_99_0"))[0]

        coef, results = lccm.estimate(self.specification, self.lc1, self.lc2, agents_index=agents_index, debuglevel=4)
        new_coef = lccm.calibrate(self.lc1_all, self.lc2_all, agents_index_all)
        specification = lccm.specification

        #save estimation results
#        out_suffix = spec_py.__name__[len(spec_py.__name__) - 11:]
        out_suffix = spec_py.__name__[30:]
        specification.write(out_storage=self.storage, out_table_name='lccm_specification_%sc' % out_suffix)
        new_coef.write(out_storage=self.storage, out_table_name='lccm_coefficients_%sc' % out_suffix)
            
        logger.log_status("Estimation done. %s s" % str(time()-t1))

    def load_specification_from_variable(self, spec_var):
        variables = []
        coefficients = []
        equations = []
        submodels = []
        try:
            for sub_model, submodel_spec in spec_var.items():
                if not isinstance(submodel_spec, dict):
                    raise ValueError, "Wrong specification format"
                if submodel_spec.has_key("equation_ids"):
                    equation_ids = submodel_spec["equation_ids"] ## this retrieves eq_ids from spec.py - they're stored in equations then passed to the equation specifications
                    del submodel_spec["equation_ids"]
                else:
                    equation_ids = None
                for var, coefs in submodel_spec.items():
                    if not equation_ids:
                        equation_ids = range(1, len(coeffs)+1)
                    for i in range(len(coefs)):
                        if coefs[i] != 0:
                            variables.append(var)
                            coefficients.append(coefs[i])
                            equations.append(equation_ids[i])
                            submodels.append(sub_model)
        except:
            raise ValueError, "Wrong specification format for submodel variable."

        specification = EquationSpecification(variables=variables, 
                                              coefficients=coefficients, 
                                              equations = equations,
                                              submodels=submodels)        
        return (specification, variables, coefficients, equations, submodels)
Ejemplo n.º 8
0
        test_flag = options.test_flag

        flt_directory_in = options.input
        flt_directory_out = options.output

        logger.log_status("Convert output data for ", str(current_year))

    #todo: how to get 'land_covers' from dataset?
    flt_directory_out = os.path.join(flt_directory_out, 'land_covers')

    if not os.path.exists(flt_directory_out):
        os.makedirs(flt_directory_out)

    lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
        'flt_storage', storage_location=flt_directory_in))
    relative_x = lc.get_attribute("relative_x")
    relative_y = lc.get_attribute("relative_y")
    flat_indices = relative_x * ncols * 1.0 + relative_y

    if flat_indices[5 * ncols:] is None or len(flat_indices[5 * ncols:]) == 0:
        offset = 0
    else:
        offset = 5 * ncols

    #if os.path.exists("indices.lf4"):
    #    os.remove("indices.lf4")
    #flat_indices.tofile("indices.lf4")

    logger.start_block("Converting")
    try:
        for attr_name in lc.get_primary_attribute_names():
Ejemplo n.º 9
0
 def do_test_on_expected_data(self, input_variables_list, input_resources=None, 
                              element_atol=None, sum_atol=None):
     from biocomplexity.datasets.land_cover_dataset import LandCoverDataset
     from biocomplexity.tests.utils.land_cover_tests_utils import make_input_data            
     
     import os
     
     package_dir_path = package().get_package_path()
     flt_directory = os.path.join(package_dir_path, "data", "small_test_set_opus", "1995")
     #flt_directory = r"C:\eclipse\LCCM_small_test_set_converted\1991"
     expected_lc = LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory))
     expected_lc.load_dataset()            
     
     temp_dir = make_input_data(flt_directory, input_variables_list)
     try:
         lc = LandCoverDataset(in_storage = StorageFactory().get_storage(
             'flt_storage', 
             storage_location=temp_dir),
             out_storage = StorageFactory().get_storage('flt_storage', storage_location = r"c:/tmp"))
         lc.load_dataset()
         
         dataset_pool = DatasetPool(
             package_order=['biocomplexity'],
             storage=StorageFactory().get_storage('flt_storage', storage_location=temp_dir))
         dataset_pool._add_dataset('land_cover', lc)
         lc.compute_variables(self.variable_name, resources=input_resources, 
                              dataset_pool=dataset_pool)
         
         #lc.write_dataset(attributes='*')
         
         lc_values = lc.get_attribute(self.variable_name)
         expected_values = expected_lc.get_attribute(self.variable_name)    
         
         if sum_atol is None: sum_atol = 1e-8
         if element_atol is None: element_atol = 1e-8
         
         if (not ma.allclose(lc_values, expected_values, atol=element_atol)):
             logger.log_status("comparision using element-atol=%f, sum-atol=%f" % (element_atol, sum_atol))
             logger.log_status("      computed      expected");
             logger.log_status("sum: ", lc_values.sum(), expected_values.sum())
             logger.log_status("max: ", max(lc_values), max(expected_values))
             logger.log_status("min: ", min(lc_values), min(expected_values))
             
             c1 = 0
             c2 = 0
             for (i,j) in zip(lc_values, expected_values):
                 if i != 0:
                     c1 = c1 + 1
                 if j != 0:
                     c2 = c2 + 1
                     
             logger.log_status("# non-zeros values: ", c1, c2)
             logger.log_status("max distance between 2 elements: %f" % 
                                         self._max_distance_between_two_elements(lc_values,expected_values))
             logger.log_status(lc_values, expected_values)
             count = 0
             total = 0
             for (i,j) in zip(lc_values, expected_values):
                 if i != j:
                     count = count + 1
                 total = total + 1
             logger.log_status("# different elements = %d, over %d, with a %f percentage" \
                             % (count, total, count*1.0/total))
         
         #self.assert_(ma.allclose(lc_values.sum(), expected_values.sum(), atol=sum_atol))
         self.assert_(ma.allclose(lc_values, expected_values, atol=element_atol))
     finally:
         if os.path.exists(temp_dir):
             rmtree(temp_dir)
Ejemplo n.º 10
0
    def do_test_on_expected_data(self,
                                 input_variables_list,
                                 input_resources=None,
                                 element_atol=None,
                                 sum_atol=None):
        from biocomplexity.datasets.land_cover_dataset import LandCoverDataset
        from biocomplexity.tests.utils.land_cover_tests_utils import make_input_data

        import os

        package_dir_path = package().get_package_path()
        flt_directory = os.path.join(package_dir_path, "data",
                                     "small_test_set_opus", "1995")
        #flt_directory = r"C:\eclipse\LCCM_small_test_set_converted\1991"
        expected_lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
            'flt_storage', storage_location=flt_directory))
        expected_lc.load_dataset()

        temp_dir = make_input_data(flt_directory, input_variables_list)
        try:
            lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
                'flt_storage', storage_location=temp_dir),
                                  out_storage=StorageFactory().get_storage(
                                      'flt_storage',
                                      storage_location=r"c:/tmp"))
            lc.load_dataset()

            dataset_pool = DatasetPool(package_order=['biocomplexity'],
                                       storage=StorageFactory().get_storage(
                                           'flt_storage',
                                           storage_location=temp_dir))
            dataset_pool._add_dataset('land_cover', lc)
            lc.compute_variables(self.variable_name,
                                 resources=input_resources,
                                 dataset_pool=dataset_pool)

            #lc.write_dataset(attributes='*')

            lc_values = lc.get_attribute(self.variable_name)
            expected_values = expected_lc.get_attribute(self.variable_name)

            if sum_atol is None: sum_atol = 1e-8
            if element_atol is None: element_atol = 1e-8

            if (not ma.allclose(lc_values, expected_values,
                                atol=element_atol)):
                logger.log_status(
                    "comparision using element-atol=%f, sum-atol=%f" %
                    (element_atol, sum_atol))
                logger.log_status("      computed      expected")
                logger.log_status("sum: ", lc_values.sum(),
                                  expected_values.sum())
                logger.log_status("max: ", max(lc_values),
                                  max(expected_values))
                logger.log_status("min: ", min(lc_values),
                                  min(expected_values))

                c1 = 0
                c2 = 0
                for (i, j) in zip(lc_values, expected_values):
                    if i != 0:
                        c1 = c1 + 1
                    if j != 0:
                        c2 = c2 + 1

                logger.log_status("# non-zeros values: ", c1, c2)
                logger.log_status("max distance between 2 elements: %f" %
                                  self._max_distance_between_two_elements(
                                      lc_values, expected_values))
                logger.log_status(lc_values, expected_values)
                count = 0
                total = 0
                for (i, j) in zip(lc_values, expected_values):
                    if i != j:
                        count = count + 1
                    total = total + 1
                logger.log_status("# different elements = %d, over %d, with a %f percentage" \
                                % (count, total, count*1.0/total))

            #self.assert_(ma.allclose(lc_values.sum(), expected_values.sum(), atol=sum_atol))
            self.assert_(
                ma.allclose(lc_values, expected_values, atol=element_atol))
        finally:
            if os.path.exists(temp_dir):
                rmtree(temp_dir)