def get_resources(self, data_dictionary, dataset):
        """Create resources for computing a variable. """
        resources=Resources()
        for key in data_dictionary.keys():
            if key in self.datasets:
                data = data_dictionary[key]
                if self.id_names[key] not in data_dictionary[key].keys() and not isinstance(self.id_names[key], list):
            
                    data[self.id_names[key]] = arange(1,\
                        len(data_dictionary[key][data_dictionary[key].keys()[0]])+1) # add id array
                
                if key == "land_cover":
                    land_cover_storage = StorageFactory().get_storage('dict_storage')
                    land_cover_table_name = 'land_cover'
                    land_cover_storage.write_table(
                            table_name=land_cover_table_name,
                            table_data=data,
                        )

                    lc = LandCoverDataset(
                        in_storage=land_cover_storage, 
                        in_table_name=land_cover_table_name, 
                        )
                        
                    # add relative_x and relative_y
                    lc.get_id_attribute()
                    n = int(ceil(sqrt(lc.size())))
                    
                    if "relative_x" not in data.keys():
                        x = (indices((n,n))+1)[1].ravel()
                        lc.add_attribute(x[0:lc.size()], "relative_x", metadata=1)
                    if "relative_y" not in data.keys():
                        y = (indices((n,n))+1)[0].ravel()
                        lc.add_attribute(y[0:lc.size()], "relative_y", metadata=1)
                        
                    resources.merge({key: lc})
                    
                if key == "gridcell":
                    gridcell_storage = StorageFactory().get_storage('dict_storage')
                    gridcell_table_name = 'gridcell'
                    gridcell_storage.write_table(
                            table_name=gridcell_table_name,
                            table_data=data,
                        )
                    
                    gridcell_dataset = GridcellDataset(
                        in_storage = gridcell_storage,
                        in_table_name = gridcell_table_name,
                        )
                    
                    resources.merge({key: gridcell_dataset})
            else:
                resources.merge({key:data_dictionary[key]})

        if dataset in self.interactions:
            pass
        else:
            resources.merge({"dataset": resources[dataset]})
        resources.merge({"check_variables":'*', "debug":4})
        return resources
    def __init__(self, **kargs):
        #        Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__
        #        Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed

        parent_dir_path = package().get_package_parent_path()
        package_path = OpusPackage().get_path_for_package("biocomplexity")
        self.storage = StorageFactory().get_storage("tab_storage", storage_location=os.path.join(package_path, "data"))

        ## 1. directory path of full (4 county spatial extent) dataset
        flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_4County")

        ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus")
        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig")

        ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995
        ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999

        ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years)
        #        years = [1991, 1995]
        #        years = [1995, 1999]
        years = [1999, 2002]

        self.lc1 = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage", storage_location=os.path.join(flt_directory_est, str(years[0]))
            ),
            resources=Resources({"lowercase": 1}),
        )
        self.lc2 = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage", storage_location=os.path.join(flt_directory_est, str(years[1]))
            ),
            resources=Resources({"lowercase": 1}),
        )

        self.lc1_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage", storage_location=os.path.join(flt_directory, str(years[0]))
            ),
            resources=Resources({"lowercase": 1}),
        )
        self.lc1_all.flush_dataset()
        self.lc2_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage", storage_location=os.path.join(flt_directory, str(years[1]))
            ),
            resources=Resources({"lowercase": 1}),
        )
        self.lc2_all.flush_dataset()
Exemple #3
0
    def _create_flt_file(self, current_year, flt_directory_in, flt_directory_out):

        logger.log_status("Convert output data for ", str(current_year))
        
        flt_directory_out = os.path.join(flt_directory_out, 'land_covers')    
        
        if not os.path.exists(flt_directory_out):
            os.makedirs(flt_directory_out)

        lc =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in))
        relative_x = lc.get_attribute("relative_x")
        relative_y = lc.get_attribute("relative_y")
        flat_indices = relative_x * self.ncols * 1.0 + relative_y
        
        if flat_indices[5*self.ncols:] is None or len(flat_indices[5*self.ncols:]) == 0:
            offset = 0
        else:
            offset = 5*self.ncols
        
        logger.start_block("Converting")
        try:    
            for attr_name in lc.get_primary_attribute_names():
                if attr_name not in ["relative_x", "relative_y"]:
                    attr_name = "lct" #-------------- only output lct now
                    logger.log_status("    ", attr_name)
                    attr = ma.filled(lc.get_attribute(attr_name), self.nodata_values).astype(float32)
                    self._create_flt_file2(os.path.join(flt_directory_out, attr_name+".lf4"), attr, flat_indices, offset)
                    self._create_header(os.path.join(flt_directory_out, attr_name+".hdr")) #<-- added 26 may 09 by mm
                    del attr
                    break #-------------- only output lct now
                    
            lc.load_dataset(attributes='*')
            if lc.get_computed_attribute_names() is not None:        
                flt_directory_out = os.path.join(flt_directory_out, "computed")
                if not os.path.exists(flt_directory_out):
                    os.makedirs(flt_directory_out)
                for attr_name in lc.get_computed_attribute_names():
                    if attr_name not in ["_hidden_id_"]:
                      if attr_name[0:5] == "probs":
                        logger.log_status("    ", attr_name)
                        attr = ma.filled(lc.get_attribute(attr_name), self.nodata_values).astype(float32)
                        self._create_flt_file2(os.path.join(flt_directory_out, attr_name+".lf4"), attr, flat_indices, offset)
                        self._create_header(os.path.join(flt_directory_out, attr_name+".hdr")) #<-- added 26 may 09 by mm
                        del attr
        finally:
#            lc.flush_dataset() # added 23 jun 2009 - not tested...
            logger.end_block()
 def _copy_invariants_to_temp_land_cover_dir(self, land_cover_path):
     logger.log_status("temp input land cover data in " + self.temp_land_cover_dir)
     land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage("flt_storage", storage_location=land_cover_path),
                                out_storage=StorageFactory().get_storage("flt_storage", storage_location=self.temp_land_cover_dir),
                                out_table_name='land_covers', debuglevel=4)
     logger.log_status("Land cover dataset created.... ") # added dec 4, 2009
     land_covers.flush_dataset() # added dec 4, 2009
     land_covers.write_dataset(attributes=AttributeType.PRIMARY)
Exemple #5
0
    def run(self, base_directory, urbansim_cache_directory, years):
        """ run the simulation
                base_directory: directory contains all years folder of lccm.
                urbansim_cache_directory: directory contains all years folder of urbansim cache.
                years: lists of year to run."""
        model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, 
                                     choice_attribute_name= self.lct_attribute, debuglevel=4)
        coefficients = Coefficients()
        storage = StorageFactory().get_storage('tab_storage', 
            storage_location=os.path.join(self.package_path, 'data'))
        coefficients.load(in_storage=storage, in_table_name="land_cover_change_model_coefficients")
        specification = EquationSpecification(in_storage=storage)
        specification.load(in_table_name="land_cover_change_model_specification")
        specification.set_variable_prefix("biocomplexity.land_cover.")
        constants = Constants()
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(urbansim_cache_directory)
        attribute_cache = AttributeCache()
        index = arange(100000)
        for year in years:
            simulation_state.set_current_time(year)
            #land_cover_path = os.path.join(base_directory, str(year))
            land_cover_path = base_directory
            land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                       out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                       debuglevel=4)
            land_covers.subset_by_index(index)
            #land_covers.load_dataset()
            gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4)

            agents_index = None
            model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells,
                          "constants":constants, "flush_variables":True},
                          chunk_specification = {'nchunks':1}
                          )
            land_covers.flush_dataset()
            del gridcells
            del land_covers
Exemple #6
0
    def __init__(self, **kargs):
        #        Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__
        #        Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed

        parent_dir_path = package().get_package_parent_path()
        package_path = OpusPackage().get_path_for_package("biocomplexity")
        self.storage = StorageFactory().get_storage(
            'tab_storage', storage_location=os.path.join(package_path, 'data'))

        ## 1. directory path of full (4 county spatial extent) dataset
        flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data",
                                     "LCCM_4County")

        ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus")
        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity",
                                         "data", "data_for_estimation_all")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig")

        ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995
        ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999

        ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years)
        #        years = [1991, 1995]
        years = [1995, 1999]
        #        years = [1999, 2002]

        self.lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage(
            "flt_storage",
            storage_location=os.path.join(flt_directory_est, str(years[0]))),
                                    resources=Resources({"lowercase": 1}))
        self.lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage(
            "flt_storage",
            storage_location=os.path.join(flt_directory_est, str(years[1]))),
                                    resources=Resources({"lowercase": 1}))

        self.lc1_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage",
                storage_location=os.path.join(flt_directory, str(years[0]))),
            resources=Resources({"lowercase": 1}))
        self.lc1_all.flush_dataset()
        self.lc2_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage",
                storage_location=os.path.join(flt_directory, str(years[1]))),
            resources=Resources({"lowercase": 1}))
        self.lc2_all.flush_dataset()
    def run(self, base_directory, urbansim_cache_directory, years):
        """ run the simulation
                base_directory: directory contains all years folder of lccm.
                urbansim_cache_directory: directory contains all years folder of urbansim cache.
                years: lists of year to run."""
        model = LandCoverChangeModel(self.possible_lcts,
                                     submodel_string=self.lct_attribute,
                                     choice_attribute_name=self.lct_attribute,
                                     debuglevel=4)
        coefficients = Coefficients()
        storage = StorageFactory().get_storage('tab_storage',
                                               storage_location=os.path.join(
                                                   self.package_path, 'data'))
        coefficients.load(in_storage=storage,
                          in_table_name="land_cover_change_model_coefficients")
        specification = EquationSpecification(in_storage=storage)
        specification.load(
            in_table_name="land_cover_change_model_specification")
        specification.set_variable_prefix("biocomplexity.land_cover.")
        constants = Constants()
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(urbansim_cache_directory)
        attribute_cache = AttributeCache()
        index = arange(100000)
        for year in years:
            simulation_state.set_current_time(year)
            #land_cover_path = os.path.join(base_directory, str(year))
            land_cover_path = base_directory
            land_covers = LandCoverDataset(
                in_storage=StorageFactory().get_storage(
                    'flt_storage', storage_location=land_cover_path),
                out_storage=StorageFactory().get_storage(
                    'flt_storage', storage_location=land_cover_path),
                debuglevel=4)
            land_covers.subset_by_index(index)
            #land_covers.load_dataset()
            gridcells = GridcellDataset(in_storage=attribute_cache,
                                        debuglevel=4)

            agents_index = None
            model.run(specification,
                      coefficients,
                      land_covers,
                      data_objects={
                          "gridcell": gridcells,
                          "constants": constants,
                          "flush_variables": True
                      },
                      chunk_specification={'nchunks': 1})
            land_covers.flush_dataset()
            del gridcells
            del land_covers
 def compute_computed_variables(self, base_directory, urbansim_cache_directory, years):
     
     for year in [years[0]]:
         land_cover_path = os.path.join(base_directory, str(year))
         #print land_cover_path
         land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                    out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path))
         land_covers.load_dataset()
         #print land_covers.get_attribute("devgrid_id")
     
         gridcell_path = os.path.join(urbansim_cache_directory)
         gridcells = GridcellDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=gridcell_path))
         gridcells.load_dataset()
         #print gridcells.summary()
     
         ## BUG: dataset_pool is not defined
         land_covers.compute_variables(self.land_cover_urbansim_output_variables, 
                                       dataset_pool=dataset_pool)
         land_covers.write_dataset(attributes='*')
         #land_covers.flush_dataset()
         del gridcells
         del land_covers
Exemple #9
0
 def compute_computed_variables(self, base_directory, urbansim_cache_directory, years):
     
     for year in [years[0]]:
         land_cover_path = os.path.join(base_directory, str(year))
         #print land_cover_path
         land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                    out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path))
         land_covers.load_dataset()
         #print land_covers.get_attribute("devgrid_id")
     
         gridcell_path = os.path.join(urbansim_cache_directory)
         gridcells = GridcellDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=gridcell_path))
         gridcells.load_dataset()
         #print gridcells.summary()
     
         ## BUG: dataset_pool is not defined
         land_covers.compute_variables(self.land_cover_urbansim_output_variables, 
                                       dataset_pool=dataset_pool)
         land_covers.write_dataset(attributes='*')
         #land_covers.flush_dataset()
         del gridcells
         del land_covers
Exemple #10
0
        
        input_year = sys.argv[1]   
    
        flt_directory_in = options.input
        flt_directory_out = options.output
        
        print flt_directory_out
        
        test_flag = options.test_flag
        
        shutil.rmtree(flt_directory_out)
        os.mkdir(flt_directory_out)
        
        logger.log_status("Convert input data from ", str(input_year))
    
    lc =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in), 
        out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_out), debuglevel=4)
    
    lc.get_header()
    
#    mask = lc.get_mask()
#    idx = where(mask==0)[0]
#    lcsubset = DatasetSubset(lc, idx)
    print "Creating and writing relative_x and relative_y:"
    lc.write_dataset(attributes=["relative_x"], out_table_name="land_covers",
                                valuetypes=valuetypes)
    lc.delete_one_attribute("relative_x")
    lc.write_dataset(attributes=["relative_y"], out_table_name="land_covers",
                                valuetypes=valuetypes)
    lc.delete_one_attribute("relative_y")
    
    print "done."
Exemple #11
0
# Opus/UrbanSim urban simulation software.
# Copyright (C) 2005-2009 University of Washington
# See opus_core/LICENSE

from biocomplexity.datasets.land_cover_dataset import LandCoverDataset
from biocomplexity.opus_package_info import package
from opus_core.storage_factory import StorageFactory
import os

parent_dir_path = package().get_package_parent_path()

flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data",
                             "LCCM_small_test_set_opus", "1991")
if __name__ == "__main__":
    lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
        "flt_storage", storage_location=flt_directory))
    lc.summary()
    for attr in lc.get_attribute_names():
        print attr
        lc.plot_map(attr, main=attr)
#index_attribute = "sall_99_02_0b"
#index_attribute = "sa9902_9195_0"
#index_attribute = "sa9902_9599_0"
#index_attribute = "sall_99_02_0v1"
#index_attribute = sys.argv[2]
index_attribute = "lc0207_100k_0"

# 4. Years - date pair of input data; year is concatenated to flt_directory_in specified in #1
#years = [1991, 1995]
#years = [1995, 1999]
#years = [2002]
#years = sys.argv[3]
years = [2007, 2007]

lc1 =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', 
        storage_location = os.path.join(flt_directory_in, str(years[0]))),
    out_storage = StorageFactory().get_storage('flt_storage', 
        storage_location = os.path.join(flt_directory_out, str(years[0]))))

agents_index = where(lc1.get_attribute(index_attribute))[0]
lc1subset = DatasetSubset(lc1, agents_index)
print "Writing set 1:"
for attr in lc1.get_primary_attribute_names():
    print "   ", attr
    lc1subset.write_dataset(attributes=[attr], out_table_name="land_covers")
    lc1.delete_one_attribute(attr) # leaving this line in causes the processing of every other input data file; commenting it causes memory error
    
lc2 =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', 
        storage_location = os.path.join(flt_directory_in, str(years[1]))),
    out_storage = StorageFactory().get_storage('flt_storage',
        storage_location = os.path.join(flt_directory_out, str(years[1]))))
                  
Exemple #13
0
        flt_directory_in = options.input
        flt_directory_out = options.output

        print flt_directory_out

        test_flag = options.test_flag

        #        shutil.rmtree(flt_directory_out)
        #        os.mkdir(flt_directory_out)

        logger.log_status("Convert input data from ", str(input_year))

    lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
        'flt_storage', storage_location=flt_directory_in),
                          out_storage=StorageFactory().get_storage(
                              'flt_storage',
                              storage_location=flt_directory_out))

    lc.get_header()  # added 23 june 2009 by mm
    mask = lc.get_mask()
    idx = where(mask == 0)[0]
    lcsubset = DatasetSubset(lc, idx)
    print "Converting:"
    lcsubset.write_dataset(attributes=["relative_x"],
                           out_table_name="land_covers")
    #lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers",
    #                            valuetypes=valuetypes)
    lc.delete_one_attribute("relative_x")
    lcsubset.write_dataset(attributes=["relative_y"],
                           out_table_name="land_covers")
Exemple #14
0
    def do_test_on_expected_data(self,
                                 input_variables_list,
                                 input_resources=None,
                                 element_atol=None,
                                 sum_atol=None):
        from biocomplexity.datasets.land_cover_dataset import LandCoverDataset
        from biocomplexity.tests.utils.land_cover_tests_utils import make_input_data

        import os

        package_dir_path = package().get_package_path()
        flt_directory = os.path.join(package_dir_path, "data",
                                     "small_test_set_opus", "1995")
        #flt_directory = r"C:\eclipse\LCCM_small_test_set_converted\1991"
        expected_lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
            'flt_storage', storage_location=flt_directory))
        expected_lc.load_dataset()

        temp_dir = make_input_data(flt_directory, input_variables_list)
        try:
            lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
                'flt_storage', storage_location=temp_dir),
                                  out_storage=StorageFactory().get_storage(
                                      'flt_storage',
                                      storage_location=r"c:/tmp"))
            lc.load_dataset()

            dataset_pool = DatasetPool(package_order=['biocomplexity'],
                                       storage=StorageFactory().get_storage(
                                           'flt_storage',
                                           storage_location=temp_dir))
            dataset_pool._add_dataset('land_cover', lc)
            lc.compute_variables(self.variable_name,
                                 resources=input_resources,
                                 dataset_pool=dataset_pool)

            #lc.write_dataset(attributes='*')

            lc_values = lc.get_attribute(self.variable_name)
            expected_values = expected_lc.get_attribute(self.variable_name)

            if sum_atol is None: sum_atol = 1e-8
            if element_atol is None: element_atol = 1e-8

            if (not ma.allclose(lc_values, expected_values,
                                atol=element_atol)):
                logger.log_status(
                    "comparision using element-atol=%f, sum-atol=%f" %
                    (element_atol, sum_atol))
                logger.log_status("      computed      expected")
                logger.log_status("sum: ", lc_values.sum(),
                                  expected_values.sum())
                logger.log_status("max: ", max(lc_values),
                                  max(expected_values))
                logger.log_status("min: ", min(lc_values),
                                  min(expected_values))

                c1 = 0
                c2 = 0
                for (i, j) in zip(lc_values, expected_values):
                    if i != 0:
                        c1 = c1 + 1
                    if j != 0:
                        c2 = c2 + 1

                logger.log_status("# non-zeros values: ", c1, c2)
                logger.log_status("max distance between 2 elements: %f" %
                                  self._max_distance_between_two_elements(
                                      lc_values, expected_values))
                logger.log_status(lc_values, expected_values)
                count = 0
                total = 0
                for (i, j) in zip(lc_values, expected_values):
                    if i != j:
                        count = count + 1
                    total = total + 1
                logger.log_status("# different elements = %d, over %d, with a %f percentage" \
                                % (count, total, count*1.0/total))

            #self.assert_(ma.allclose(lc_values.sum(), expected_values.sum(), atol=sum_atol))
            self.assert_(
                ma.allclose(lc_values, expected_values, atol=element_atol))
        finally:
            if os.path.exists(temp_dir):
                rmtree(temp_dir)
    def _create_flt_file(self, current_year, flt_directory_in,
                         flt_directory_out):

        logger.log_status("Convert output data for ", str(current_year))

        flt_directory_out = os.path.join(flt_directory_out, 'land_covers')

        if not os.path.exists(flt_directory_out):
            os.makedirs(flt_directory_out)

        lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
            'flt_storage', storage_location=flt_directory_in))
        relative_x = lc.get_attribute("relative_x")
        relative_y = lc.get_attribute("relative_y")
        flat_indices = relative_x * self.ncols * 1.0 + relative_y

        if flat_indices[5 * self.ncols:] is None or len(
                flat_indices[5 * self.ncols:]) == 0:
            offset = 0
        else:
            offset = 5 * self.ncols

        logger.start_block("Converting")
        try:
            for attr_name in lc.get_primary_attribute_names():
                if attr_name not in ["relative_x", "relative_y"]:
                    attr_name = "lct"  #-------------- only output lct now
                    logger.log_status("    ", attr_name)
                    attr = ma.filled(lc.get_attribute(attr_name),
                                     self.nodata_values).astype(float32)
                    self._create_flt_file2(
                        os.path.join(flt_directory_out, attr_name + ".lf4"),
                        attr, flat_indices, offset)
                    self._create_header(
                        os.path.join(flt_directory_out, attr_name +
                                     ".hdr"))  #<-- added 26 may 09 by mm
                    del attr
                    break  #-------------- only output lct now

            lc.load_dataset(attributes='*')
            if lc.get_computed_attribute_names() is not None:
                flt_directory_out = os.path.join(flt_directory_out, "computed")
                if not os.path.exists(flt_directory_out):
                    os.makedirs(flt_directory_out)
                for attr_name in lc.get_computed_attribute_names():
                    if attr_name not in ["_hidden_id_"]:
                        if attr_name[0:5] == "probs":
                            logger.log_status("    ", attr_name)
                            attr = ma.filled(
                                lc.get_attribute(attr_name),
                                self.nodata_values).astype(float32)
                            self._create_flt_file2(
                                os.path.join(flt_directory_out,
                                             attr_name + ".lf4"), attr,
                                flat_indices, offset)
                            self._create_header(
                                os.path.join(
                                    flt_directory_out, attr_name +
                                    ".hdr"))  #<-- added 26 may 09 by mm
                            del attr
        finally:
            #            lc.flush_dataset() # added 23 jun 2009 - not tested...
            logger.end_block()
        current_year = sys.argv[1]

        test_flag = options.test_flag

        flt_directory_in = options.input
        flt_directory_out = options.output

        logger.log_status("Convert output data for ", str(current_year))

    #todo: how to get 'land_covers' from dataset?
    flt_directory_out = os.path.join(flt_directory_out, 'land_covers')

    if not os.path.exists(flt_directory_out):
        os.makedirs(flt_directory_out)

    lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
        'flt_storage', storage_location=flt_directory_in))
    relative_x = lc.get_attribute("relative_x")
    relative_y = lc.get_attribute("relative_y")
    flat_indices = relative_x * ncols * 1.0 + relative_y

    if flat_indices[5 * ncols:] is None or len(flat_indices[5 * ncols:]) == 0:
        offset = 0
    else:
        offset = 5 * ncols

    #if os.path.exists("indices.lf4"):
    #    os.remove("indices.lf4")
    #flat_indices.tofile("indices.lf4")

    logger.start_block("Converting")
    try:
Exemple #17
0
#index_attribute = "sa9902_9599_0"
#index_attribute = "sall_99_02_0v1"
#index_attribute = sys.argv[2]
index_attribute = "lc0207_100k_0"

# 4. Years - date pair of input data; year is concatenated to flt_directory_in specified in #1
#years = [1991, 1995]
#years = [1995, 1999]
#years = [2002]
#years = sys.argv[3]
years = [2007, 2007]

lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage(
    'flt_storage',
    storage_location=os.path.join(flt_directory_in, str(years[0]))),
                       out_storage=StorageFactory().get_storage(
                           'flt_storage',
                           storage_location=os.path.join(
                               flt_directory_out, str(years[0]))))

agents_index = where(lc1.get_attribute(index_attribute))[0]
lc1subset = DatasetSubset(lc1, agents_index)
print "Writing set 1:"
for attr in lc1.get_primary_attribute_names():
    print "   ", attr
    lc1subset.write_dataset(attributes=[attr], out_table_name="land_covers")
    lc1.delete_one_attribute(
        attr
    )  # leaving this line in causes the processing of every other input data file; commenting it causes memory error

lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage(
class LCCMEstimator(Estimator):
    def __init__(self, **kargs):
#        Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__
#        Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed

        parent_dir_path = package().get_package_parent_path()
        package_path = OpusPackage().get_path_for_package("biocomplexity")  
        self.storage = StorageFactory().get_storage('tab_storage', 
            storage_location=os.path.join(package_path, 'data'))

        ## 1. directory path of full (4 county spatial extent) dataset
        flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_4County")

        ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables
#        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus")
        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all")
#        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig")
#        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig")
#        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban")
#        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig")

        ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995
        ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999
        
        ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years) 
#        years = [1991, 1995]
        years = [1995, 1999]
#        years = [1999, 2002]
        
        self.lc1 =  LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", 
            storage_location = os.path.join(flt_directory_est, str(years[0]))),
            resources=Resources({"lowercase":1}))
        self.lc2 =  LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", 
            storage_location = os.path.join(flt_directory_est, str(years[1]))),
            resources=Resources({"lowercase":1}))
        
        self.lc1_all =  LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", 
            storage_location = os.path.join(flt_directory, str(years[0]))),
            resources=Resources({"lowercase":1}))
        self.lc1_all.flush_dataset()
        self.lc2_all =  LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", 
            storage_location = os.path.join(flt_directory, str(years[1]))),
            resources=Resources({"lowercase":1}))
        self.lc2_all.flush_dataset()
        
    def estimate(self, spec_py=None, spec_var=None, spec_file=None):
        t1 = time()
        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification
        if spec_var is not None:
            self.specification, variables, coefficents, equations, submodels = \
                self.load_specification_from_variable(spec_var)
        elif spec_file is not None:
            self.specification = EquationSpecification(in_storage=self.storage)
            self.specification.load(in_table_name=spec_file)

        self.specification.set_dataset_name_of_variables("land_cover")
        
        self.model_name = "land_cover_change_model"
        choices = range(1,15)
        lccm = LandCoverChangeModel(choices, submodel_string="lct")

        ## 4. select (uncomment) from one the following choices of subsetted sampling files (agents_index)
#        agents_index = where(self.lc1.get_attribute("sall_91_95_0"))[0]
        agents_index = where(self.lc1.get_attribute("sall_95_99_0"))[0]
#        agents_index = where(self.lc1.get_attribute("sall_99_02_0b"))[0]
#        agents_index = where(self.lc1.get_attribute("sa9902_9195_0"))[0]
#        agents_index = where(self.lc1.get_attribute("sa9902_9599_0"))[0]
#        agents_index = where(self.lc1.get_attribute("suburb91_95sample0"))[0]
#        agents_index = where(self.lc1.get_attribute("suburb95_99sample0"))[0]
#        agents_index = where(self.lc1.get_attribute("up91x95_old_samp0"))[0]
#        agents_index = where(self.lc1.get_attribute("urbsamp95_99_0"))[0]

        ## need to include agents_index_all seperate for the calibration portion
        ##    when using the dataset at the full extent, agents_index_all is needed as it is
        ##    created from the lc1_all agents_set and matches the size of the input data

        ## 5. select (uncomment) from one the following choices of sampling files (agents_index) at full spatial extent
#        agents_index_all = where(self.lc1_all.get_attribute("sall_91_95_0"))[0]
        agents_index_all = where(self.lc1_all.get_attribute("sall_95_99_0"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("sall_99_02_0b"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("sa9902_9195_0"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("sa9902_9599_0"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("suburb91_95sample0"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("suburb95_99sample0"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("up91x95_old_samp0"))[0]
#        agents_index_all = where(self.lc1_all.get_attribute("urbsamp95_99_0"))[0]

        coef, results = lccm.estimate(self.specification, self.lc1, self.lc2, agents_index=agents_index, debuglevel=4)
        new_coef = lccm.calibrate(self.lc1_all, self.lc2_all, agents_index_all)
        specification = lccm.specification

        #save estimation results
#        out_suffix = spec_py.__name__[len(spec_py.__name__) - 11:]
        out_suffix = spec_py.__name__[30:]
        specification.write(out_storage=self.storage, out_table_name='lccm_specification_%sc' % out_suffix)
        new_coef.write(out_storage=self.storage, out_table_name='lccm_coefficients_%sc' % out_suffix)
            
        logger.log_status("Estimation done. %s s" % str(time()-t1))

    def load_specification_from_variable(self, spec_var):
        variables = []
        coefficients = []
        equations = []
        submodels = []
        try:
            for sub_model, submodel_spec in spec_var.items():
                if not isinstance(submodel_spec, dict):
                    raise ValueError, "Wrong specification format"
                if submodel_spec.has_key("equation_ids"):
                    equation_ids = submodel_spec["equation_ids"] ## this retrieves eq_ids from spec.py - they're stored in equations then passed to the equation specifications
                    del submodel_spec["equation_ids"]
                else:
                    equation_ids = None
                for var, coefs in submodel_spec.items():
                    if not equation_ids:
                        equation_ids = range(1, len(coeffs)+1)
                    for i in range(len(coefs)):
                        if coefs[i] != 0:
                            variables.append(var)
                            coefficients.append(coefs[i])
                            equations.append(equation_ids[i])
                            submodels.append(sub_model)
        except:
            raise ValueError, "Wrong specification format for submodel variable."

        specification = EquationSpecification(variables=variables, 
                                              coefficients=coefficients, 
                                              equations = equations,
                                              submodels=submodels)        
        return (specification, variables, coefficients, equations, submodels)
Exemple #19
0
# Opus/UrbanSim urban simulation software.
# Copyright (C) 2010-2011 University of California, Berkeley, 2005-2009 University of Washington
# See opus_core/LICENSE


from biocomplexity.datasets.land_cover_dataset import LandCoverDataset
from biocomplexity.opus_package_info import package
from opus_core.storage_factory import StorageFactory
import os

parent_dir_path = package().get_package_parent_path()

flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus", "1991")
if __name__ == "__main__":
    lc =  LandCoverDataset(in_storage = StorageFactory().get_storage("flt_storage", storage_location = flt_directory))
    lc.summary()
    for attr in lc.get_attribute_names():
        print attr
        lc.plot_map(attr, main=attr)
    def _convert_lccm_input(self, flt_directory_in, flt_directory_out):
        gc.collect()
        t1 = time()
        lc =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in), 
            out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_out))
#        lc.get_header() # added 23 june 2009 by mm
        mask = lc.get_mask()
        idx = where(mask==0)[0]
        lcsubset = DatasetSubset(lc, idx)
        print "Converting:"
        lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers")
        lc.delete_one_attribute("relative_x")
        lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers")
        lc.delete_one_attribute("relative_y")
        lc.flush_dataset()
        gc.collect()
#        lc_names = lc.get_primary_attribute_names()
        for attr in lc.get_primary_attribute_names():
            print "   ", attr
            lcsubset.write_dataset(attributes=[attr], out_table_name="land_covers")
            lc.delete_one_attribute(attr)
        logger.log_status("Data conversion done. " + str(time()-t1) + " s")
 def _get_max_index(self, land_cover_path):
     land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage("flt_storage", storage_location=land_cover_path))
     return land_covers.size()
    def run(self, base_directory, urbansim_cache_directory, years, output_directory, temp_folder,
            coefficients_name, specification_name, convert_flt=True, convert_input=False):
        """ run the simulation
                base_directory: directory contains all years folder of lccm.
                urbansim_cache_directory: directory contains all years folder of urbansim cache.
                years: lists of year to run."""
        model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, 
                                     choice_attribute_name=self.lct_attribute, debuglevel=4)
        coefficients = Coefficients()
        storage = StorageFactory().get_storage('tab_storage', 
            storage_location=os.path.join(self.package_path, 'data'))
        coefficients.load(in_storage=storage, in_table_name=coefficients_name)
        specification = EquationSpecification(in_storage=storage)
        specification.load(in_table_name=specification_name)
        specification.set_variable_prefix("biocomplexity.land_cover.")
        constants = Constants()
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(urbansim_cache_directory)
        attribute_cache = AttributeCache()
        SessionConfiguration(new_instance=True,
                             package_order=['biocomplexity', 'urbansim', 'opus_core'],
                             in_storage=AttributeCache())
                
        ncols = LccmConfiguration.ncols        
        
        if temp_folder is None:
            self.temp_land_cover_dir = tempfile.mkdtemp()
        else:
            self.temp_land_cover_dir = temp_folder
        
        for year in years:
            land_cover_path = self._generate_input_land_cover(year, base_directory, urbansim_cache_directory, 
                                                              years, output_directory, convert_flt, convert_input)
            #max_size = 174338406 (orig) - act. int: 19019944 (37632028 incl NoData)
            max_size = self._get_max_index(land_cover_path) # 1st instance of lc_dataset - but looks like a 'lite' version
            offset = min(LccmConfiguration.offset, max_size)
            s = 0
            t = offset
            while (s < t and t <= max_size):
                logger.log_status("Offset: ", s, t)
                index = arange(s,t)
                
                land_cover_cache_path=os.path.join(urbansim_cache_directory,str(year),'land_covers')
                self._clean_up_land_cover_cache(land_cover_cache_path)
                
                simulation_state.set_current_time(year)
                
                # 2nd instance of lc_dataset
                land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                           out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                           debuglevel=4)
                land_covers.subset_by_index(index)
#                land_covers.load_dataset()
                gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4)

                agents_index = None
                model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells,
                              "constants":constants, "flush_variables":True},
                              chunk_specification = {'nchunks':5}) ## chunk size set here
                land_covers.flush_dataset()
                del gridcells
                del land_covers

#                self._generate_output_flt(year, urbansim_cache_directory, output_directory, convert_flt)
                self._generate_output_flt2(year, urbansim_cache_directory, output_directory, convert_flt)
                
                if t >= max_size: break
                s = max(t-10*ncols,s)
                t = min(t+offset-10*ncols,max_size)
                
        # clean up temp storage after done simulation
        shutil.rmtree(self.temp_land_cover_dir)
Exemple #23
0
class LCCMEstimator(Estimator):
    def __init__(self, **kargs):
        #        Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__
        #        Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed

        parent_dir_path = package().get_package_parent_path()
        package_path = OpusPackage().get_path_for_package("biocomplexity")
        self.storage = StorageFactory().get_storage(
            'tab_storage', storage_location=os.path.join(package_path, 'data'))

        ## 1. directory path of full (4 county spatial extent) dataset
        flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data",
                                     "LCCM_4County")

        ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus")
        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity",
                                         "data", "data_for_estimation_all")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig")

        ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995
        ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999

        ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years)
        #        years = [1991, 1995]
        years = [1995, 1999]
        #        years = [1999, 2002]

        self.lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage(
            "flt_storage",
            storage_location=os.path.join(flt_directory_est, str(years[0]))),
                                    resources=Resources({"lowercase": 1}))
        self.lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage(
            "flt_storage",
            storage_location=os.path.join(flt_directory_est, str(years[1]))),
                                    resources=Resources({"lowercase": 1}))

        self.lc1_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage",
                storage_location=os.path.join(flt_directory, str(years[0]))),
            resources=Resources({"lowercase": 1}))
        self.lc1_all.flush_dataset()
        self.lc2_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage",
                storage_location=os.path.join(flt_directory, str(years[1]))),
            resources=Resources({"lowercase": 1}))
        self.lc2_all.flush_dataset()

    def estimate(self, spec_py=None, spec_var=None, spec_file=None):
        t1 = time()
        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification
        if spec_var is not None:
            self.specification, variables, coefficents, equations, submodels = \
                self.load_specification_from_variable(spec_var)
        elif spec_file is not None:
            self.specification = EquationSpecification(in_storage=self.storage)
            self.specification.load(in_table_name=spec_file)

        self.specification.set_dataset_name_of_variables("land_cover")

        self.model_name = "land_cover_change_model"
        choices = range(1, 15)
        lccm = LandCoverChangeModel(choices, submodel_string="lct")

        ## 4. select (uncomment) from one the following choices of subsetted sampling files (agents_index)
        #        agents_index = where(self.lc1.get_attribute("sall_91_95_0"))[0]
        agents_index = where(self.lc1.get_attribute("sall_95_99_0"))[0]
        #        agents_index = where(self.lc1.get_attribute("sall_99_02_0b"))[0]
        #        agents_index = where(self.lc1.get_attribute("sa9902_9195_0"))[0]
        #        agents_index = where(self.lc1.get_attribute("sa9902_9599_0"))[0]
        #        agents_index = where(self.lc1.get_attribute("suburb91_95sample0"))[0]
        #        agents_index = where(self.lc1.get_attribute("suburb95_99sample0"))[0]
        #        agents_index = where(self.lc1.get_attribute("up91x95_old_samp0"))[0]
        #        agents_index = where(self.lc1.get_attribute("urbsamp95_99_0"))[0]

        ## need to include agents_index_all seperate for the calibration portion
        ##    when using the dataset at the full extent, agents_index_all is needed as it is
        ##    created from the lc1_all agents_set and matches the size of the input data

        ## 5. select (uncomment) from one the following choices of sampling files (agents_index) at full spatial extent
        #        agents_index_all = where(self.lc1_all.get_attribute("sall_91_95_0"))[0]
        agents_index_all = where(self.lc1_all.get_attribute("sall_95_99_0"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("sall_99_02_0b"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("sa9902_9195_0"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("sa9902_9599_0"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("suburb91_95sample0"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("suburb95_99sample0"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("up91x95_old_samp0"))[0]
        #        agents_index_all = where(self.lc1_all.get_attribute("urbsamp95_99_0"))[0]

        coef, results = lccm.estimate(self.specification,
                                      self.lc1,
                                      self.lc2,
                                      agents_index=agents_index,
                                      debuglevel=4)
        new_coef = lccm.calibrate(self.lc1_all, self.lc2_all, agents_index_all)
        specification = lccm.specification

        #save estimation results
        #        out_suffix = spec_py.__name__[len(spec_py.__name__) - 11:]
        out_suffix = spec_py.__name__[30:]
        specification.write(out_storage=self.storage,
                            out_table_name='lccm_specification_%sc' %
                            out_suffix)
        new_coef.write(out_storage=self.storage,
                       out_table_name='lccm_coefficients_%sc' % out_suffix)

        logger.log_status("Estimation done. %s s" % str(time() - t1))

    def load_specification_from_variable(self, spec_var):
        variables = []
        coefficients = []
        equations = []
        submodels = []
        try:
            for sub_model, submodel_spec in spec_var.items():
                if not isinstance(submodel_spec, dict):
                    raise ValueError, "Wrong specification format"
                if submodel_spec.has_key("equation_ids"):
                    equation_ids = submodel_spec[
                        "equation_ids"]  ## this retrieves eq_ids from spec.py - they're stored in equations then passed to the equation specifications
                    del submodel_spec["equation_ids"]
                else:
                    equation_ids = None
                for var, coefs in submodel_spec.items():
                    if not equation_ids:
                        equation_ids = range(1, len(coeffs) + 1)
                    for i in range(len(coefs)):
                        if coefs[i] != 0:
                            variables.append(var)
                            coefficients.append(coefs[i])
                            equations.append(equation_ids[i])
                            submodels.append(sub_model)
        except:
            raise ValueError, "Wrong specification format for submodel variable."

        specification = EquationSpecification(variables=variables,
                                              coefficients=coefficients,
                                              equations=equations,
                                              submodels=submodels)
        return (specification, variables, coefficients, equations, submodels)
Exemple #24
0
     current_year = sys.argv[1]
     
     test_flag = options.test_flag        
 
     flt_directory_in = options.input
     flt_directory_out = options.output
 
     logger.log_status("Convert output data for ", str(current_year))
 
 #todo: how to get 'land_covers' from dataset?
 flt_directory_out = os.path.join(flt_directory_out, 'land_covers')    
 
 if not os.path.exists(flt_directory_out):
     os.makedirs(flt_directory_out)
 
 lc =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in))
 relative_x = lc.get_attribute("relative_x")
 relative_y = lc.get_attribute("relative_y")
 flat_indices = relative_x * ncols * 1.0 + relative_y
 
 if flat_indices[5*ncols:] is None or len(flat_indices[5*ncols:]) == 0:
     offset = 0
 else:
     offset = 5*ncols
 
 #if os.path.exists("indices.lf4"):
 #    os.remove("indices.lf4")
 #flat_indices.tofile("indices.lf4")
 
 logger.start_block("Converting")
 try:    
Exemple #25
0
    def get_resources(self, data_dictionary, dataset):
        """Create resources for computing a variable. """
        resources = Resources()
        for key in data_dictionary.keys():
            if key in self.datasets:
                data = data_dictionary[key]
                if self.id_names[key] not in data_dictionary[key].keys(
                ) and not isinstance(self.id_names[key], list):

                    data[self.id_names[key]] = arange(1,\
                        len(data_dictionary[key][data_dictionary[key].keys()[0]])+1) # add id array

                if key == "land_cover":
                    land_cover_storage = StorageFactory().get_storage(
                        'dict_storage')
                    land_cover_table_name = 'land_cover'
                    land_cover_storage.write_table(
                        table_name=land_cover_table_name,
                        table_data=data,
                    )

                    lc = LandCoverDataset(
                        in_storage=land_cover_storage,
                        in_table_name=land_cover_table_name,
                    )

                    # add relative_x and relative_y
                    lc.get_id_attribute()
                    n = int(ceil(sqrt(lc.size())))

                    if "relative_x" not in data.keys():
                        x = (indices((n, n)) + 1)[1].ravel()
                        lc.add_attribute(x[0:lc.size()],
                                         "relative_x",
                                         metadata=1)
                    if "relative_y" not in data.keys():
                        y = (indices((n, n)) + 1)[0].ravel()
                        lc.add_attribute(y[0:lc.size()],
                                         "relative_y",
                                         metadata=1)

                    resources.merge({key: lc})

                if key == "gridcell":
                    gridcell_storage = StorageFactory().get_storage(
                        'dict_storage')
                    gridcell_table_name = 'gridcell'
                    gridcell_storage.write_table(
                        table_name=gridcell_table_name,
                        table_data=data,
                    )

                    gridcell_dataset = GridcellDataset(
                        in_storage=gridcell_storage,
                        in_table_name=gridcell_table_name,
                    )

                    resources.merge({key: gridcell_dataset})
            else:
                resources.merge({key: data_dictionary[key]})

        if dataset in self.interactions:
            pass
        else:
            resources.merge({"dataset": resources[dataset]})
        resources.merge({"check_variables": '*', "debug": 4})
        return resources
Exemple #26
0
 def do_test_on_expected_data(self, input_variables_list, input_resources=None, 
                              element_atol=None, sum_atol=None):
     from biocomplexity.datasets.land_cover_dataset import LandCoverDataset
     from biocomplexity.tests.utils.land_cover_tests_utils import make_input_data            
     
     import os
     
     package_dir_path = package().get_package_path()
     flt_directory = os.path.join(package_dir_path, "data", "small_test_set_opus", "1995")
     #flt_directory = r"C:\eclipse\LCCM_small_test_set_converted\1991"
     expected_lc = LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory))
     expected_lc.load_dataset()            
     
     temp_dir = make_input_data(flt_directory, input_variables_list)
     try:
         lc = LandCoverDataset(in_storage = StorageFactory().get_storage(
             'flt_storage', 
             storage_location=temp_dir),
             out_storage = StorageFactory().get_storage('flt_storage', storage_location = r"c:/tmp"))
         lc.load_dataset()
         
         dataset_pool = DatasetPool(
             package_order=['biocomplexity'],
             storage=StorageFactory().get_storage('flt_storage', storage_location=temp_dir))
         dataset_pool._add_dataset('land_cover', lc)
         lc.compute_variables(self.variable_name, resources=input_resources, 
                              dataset_pool=dataset_pool)
         
         #lc.write_dataset(attributes='*')
         
         lc_values = lc.get_attribute(self.variable_name)
         expected_values = expected_lc.get_attribute(self.variable_name)    
         
         if sum_atol is None: sum_atol = 1e-8
         if element_atol is None: element_atol = 1e-8
         
         if (not ma.allclose(lc_values, expected_values, atol=element_atol)):
             logger.log_status("comparision using element-atol=%f, sum-atol=%f" % (element_atol, sum_atol))
             logger.log_status("      computed      expected");
             logger.log_status("sum: ", lc_values.sum(), expected_values.sum())
             logger.log_status("max: ", max(lc_values), max(expected_values))
             logger.log_status("min: ", min(lc_values), min(expected_values))
             
             c1 = 0
             c2 = 0
             for (i,j) in zip(lc_values, expected_values):
                 if i != 0:
                     c1 = c1 + 1
                 if j != 0:
                     c2 = c2 + 1
                     
             logger.log_status("# non-zeros values: ", c1, c2)
             logger.log_status("max distance between 2 elements: %f" % 
                                         self._max_distance_between_two_elements(lc_values,expected_values))
             logger.log_status(lc_values, expected_values)
             count = 0
             total = 0
             for (i,j) in zip(lc_values, expected_values):
                 if i != j:
                     count = count + 1
                 total = total + 1
             logger.log_status("# different elements = %d, over %d, with a %f percentage" \
                             % (count, total, count*1.0/total))
         
         #self.assert_(ma.allclose(lc_values.sum(), expected_values.sum(), atol=sum_atol))
         self.assert_(ma.allclose(lc_values, expected_values, atol=element_atol))
     finally:
         if os.path.exists(temp_dir):
             rmtree(temp_dir)
        
        input_year = sys.argv[1]   
    
        flt_directory_in = options.input
        flt_directory_out = options.output
        
        print flt_directory_out
        
        test_flag = options.test_flag
        
#        shutil.rmtree(flt_directory_out)
#        os.mkdir(flt_directory_out)
        
        logger.log_status("Convert input data from ", str(input_year))
    
    lc =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in), 
        out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_out))
    
    lc.get_header() # added 23 june 2009 by mm
    mask = lc.get_mask()
    idx = where(mask==0)[0]
    lcsubset = DatasetSubset(lc, idx)
    print "Converting:"
    lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers")
    #lcsubset.write_dataset(attributes=["relative_x"], out_table_name="land_covers",
    #                            valuetypes=valuetypes)
    lc.delete_one_attribute("relative_x")
    lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers")
    #lcsubset.write_dataset(attributes=["relative_y"], out_table_name="land_covers",
    #                            valuetypes=valuetypes)
    lc.delete_one_attribute("relative_y")
#    srcdir = os.path.join(flt_directory_out, "land_covers", "computed")
Exemple #28
0
 def _convert_lccm_input(self, flt_directory_in, flt_directory_out):
     gc.collect()
     t1 = time()
     lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
         'flt_storage', storage_location=flt_directory_in),
                           out_storage=StorageFactory().get_storage(
                               'flt_storage',
                               storage_location=flt_directory_out))
     #        lc.get_header() # added 23 june 2009 by mm
     mask = lc.get_mask()
     idx = where(mask == 0)[0]
     lcsubset = DatasetSubset(lc, idx)
     print "Converting:"
     lcsubset.write_dataset(attributes=["relative_x"],
                            out_table_name="land_covers")
     lc.delete_one_attribute("relative_x")
     lcsubset.write_dataset(attributes=["relative_y"],
                            out_table_name="land_covers")
     lc.delete_one_attribute("relative_y")
     lc.flush_dataset()
     gc.collect()
     #        lc_names = lc.get_primary_attribute_names()
     for attr in lc.get_primary_attribute_names():
         print "   ", attr
         lcsubset.write_dataset(attributes=[attr],
                                out_table_name="land_covers")
         lc.delete_one_attribute(attr)
     logger.log_status("Data conversion done. " + str(time() - t1) + " s")