Ejemplo n.º 1
0
    def __init__(self, **kargs):
        #        Estimator.__init__(self, settings=None, run_land_price_model_before_estimation=False, **kargs) # <-- old __init__
        #        Estimator.__init__(self, config=None, save_estimation_results=True) # <-- new __init__ doesn't work, but not needed

        parent_dir_path = package().get_package_parent_path()
        package_path = OpusPackage().get_path_for_package("biocomplexity")
        self.storage = StorageFactory().get_storage(
            'tab_storage', storage_location=os.path.join(package_path, 'data'))

        ## 1. directory path of full (4 county spatial extent) dataset
        flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data",
                                     "LCCM_4County")

        ## 2. select (uncomment) from one the following choices of directory pathes of subsetted sample input data/variables
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "LCCM_small_test_set_opus")
        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity",
                                         "data", "data_for_estimation_all")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_estimation_all_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_suburban_orig")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban")
        #        flt_directory_est = os.path.join(parent_dir_path, "biocomplexity", "data", "data_for_urban_orig")

        ## note - must rename lct-forusewith91sample.Float32 to lct.lf4 if doing 1991-1995
        ## note - must rename lct-forusewith95sample.Float32 to lct.lf4 if doing 1995-1999

        ## 3. select (uncomment) from one the following choices of land cover data (input data) date pairs (years)
        #        years = [1991, 1995]
        years = [1995, 1999]
        #        years = [1999, 2002]

        self.lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage(
            "flt_storage",
            storage_location=os.path.join(flt_directory_est, str(years[0]))),
                                    resources=Resources({"lowercase": 1}))
        self.lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage(
            "flt_storage",
            storage_location=os.path.join(flt_directory_est, str(years[1]))),
                                    resources=Resources({"lowercase": 1}))

        self.lc1_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage",
                storage_location=os.path.join(flt_directory, str(years[0]))),
            resources=Resources({"lowercase": 1}))
        self.lc1_all.flush_dataset()
        self.lc2_all = LandCoverDataset(
            in_storage=StorageFactory().get_storage(
                "flt_storage",
                storage_location=os.path.join(flt_directory, str(years[1]))),
            resources=Resources({"lowercase": 1}))
        self.lc2_all.flush_dataset()
Ejemplo n.º 2
0
 def _convert_lccm_input(self, flt_directory_in, flt_directory_out):
     gc.collect()
     t1 = time()
     lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
         'flt_storage', storage_location=flt_directory_in),
                           out_storage=StorageFactory().get_storage(
                               'flt_storage',
                               storage_location=flt_directory_out))
     #        lc.get_header() # added 23 june 2009 by mm
     mask = lc.get_mask()
     idx = where(mask == 0)[0]
     lcsubset = DatasetSubset(lc, idx)
     print "Converting:"
     lcsubset.write_dataset(attributes=["relative_x"],
                            out_table_name="land_covers")
     lc.delete_one_attribute("relative_x")
     lcsubset.write_dataset(attributes=["relative_y"],
                            out_table_name="land_covers")
     lc.delete_one_attribute("relative_y")
     lc.flush_dataset()
     gc.collect()
     #        lc_names = lc.get_primary_attribute_names()
     for attr in lc.get_primary_attribute_names():
         print "   ", attr
         lcsubset.write_dataset(attributes=[attr],
                                out_table_name="land_covers")
         lc.delete_one_attribute(attr)
     logger.log_status("Data conversion done. " + str(time() - t1) + " s")
 def _copy_invariants_to_temp_land_cover_dir(self, land_cover_path):
     logger.log_status("temp input land cover data in " + self.temp_land_cover_dir)
     land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage("flt_storage", storage_location=land_cover_path),
                                out_storage=StorageFactory().get_storage("flt_storage", storage_location=self.temp_land_cover_dir),
                                out_table_name='land_covers', debuglevel=4)
     logger.log_status("Land cover dataset created.... ") # added dec 4, 2009
     land_covers.flush_dataset() # added dec 4, 2009
     land_covers.write_dataset(attributes=AttributeType.PRIMARY)
Ejemplo n.º 4
0
    def run(self, base_directory, urbansim_cache_directory, years):
        """ run the simulation
                base_directory: directory contains all years folder of lccm.
                urbansim_cache_directory: directory contains all years folder of urbansim cache.
                years: lists of year to run."""
        model = LandCoverChangeModel(self.possible_lcts,
                                     submodel_string=self.lct_attribute,
                                     choice_attribute_name=self.lct_attribute,
                                     debuglevel=4)
        coefficients = Coefficients()
        storage = StorageFactory().get_storage('tab_storage',
                                               storage_location=os.path.join(
                                                   self.package_path, 'data'))
        coefficients.load(in_storage=storage,
                          in_table_name="land_cover_change_model_coefficients")
        specification = EquationSpecification(in_storage=storage)
        specification.load(
            in_table_name="land_cover_change_model_specification")
        specification.set_variable_prefix("biocomplexity.land_cover.")
        constants = Constants()
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(urbansim_cache_directory)
        attribute_cache = AttributeCache()
        index = arange(100000)
        for year in years:
            simulation_state.set_current_time(year)
            #land_cover_path = os.path.join(base_directory, str(year))
            land_cover_path = base_directory
            land_covers = LandCoverDataset(
                in_storage=StorageFactory().get_storage(
                    'flt_storage', storage_location=land_cover_path),
                out_storage=StorageFactory().get_storage(
                    'flt_storage', storage_location=land_cover_path),
                debuglevel=4)
            land_covers.subset_by_index(index)
            #land_covers.load_dataset()
            gridcells = GridcellDataset(in_storage=attribute_cache,
                                        debuglevel=4)

            agents_index = None
            model.run(specification,
                      coefficients,
                      land_covers,
                      data_objects={
                          "gridcell": gridcells,
                          "constants": constants,
                          "flush_variables": True
                      },
                      chunk_specification={'nchunks': 1})
            land_covers.flush_dataset()
            del gridcells
            del land_covers
Ejemplo n.º 5
0
 def compute_computed_variables(self, base_directory, urbansim_cache_directory, years):
     
     for year in [years[0]]:
         land_cover_path = os.path.join(base_directory, str(year))
         #print land_cover_path
         land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                    out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path))
         land_covers.load_dataset()
         #print land_covers.get_attribute("devgrid_id")
     
         gridcell_path = os.path.join(urbansim_cache_directory)
         gridcells = GridcellDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=gridcell_path))
         gridcells.load_dataset()
         #print gridcells.summary()
     
         ## BUG: dataset_pool is not defined
         land_covers.compute_variables(self.land_cover_urbansim_output_variables, 
                                       dataset_pool=dataset_pool)
         land_covers.write_dataset(attributes='*')
         #land_covers.flush_dataset()
         del gridcells
         del land_covers
Ejemplo n.º 6
0
        
        input_year = sys.argv[1]   
    
        flt_directory_in = options.input
        flt_directory_out = options.output
        
        print flt_directory_out
        
        test_flag = options.test_flag
        
        shutil.rmtree(flt_directory_out)
        os.mkdir(flt_directory_out)
        
        logger.log_status("Convert input data from ", str(input_year))
    
    lc =  LandCoverDataset(in_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_in), 
        out_storage = StorageFactory().get_storage('flt_storage', storage_location = flt_directory_out), debuglevel=4)
    
    lc.get_header()
    
#    mask = lc.get_mask()
#    idx = where(mask==0)[0]
#    lcsubset = DatasetSubset(lc, idx)
    print "Creating and writing relative_x and relative_y:"
    lc.write_dataset(attributes=["relative_x"], out_table_name="land_covers",
                                valuetypes=valuetypes)
    lc.delete_one_attribute("relative_x")
    lc.write_dataset(attributes=["relative_y"], out_table_name="land_covers",
                                valuetypes=valuetypes)
    lc.delete_one_attribute("relative_y")
    
    print "done."
Ejemplo n.º 7
0
# Opus/UrbanSim urban simulation software.
# Copyright (C) 2005-2009 University of Washington
# See opus_core/LICENSE

from biocomplexity.datasets.land_cover_dataset import LandCoverDataset
from biocomplexity.opus_package_info import package
from opus_core.storage_factory import StorageFactory
import os

parent_dir_path = package().get_package_parent_path()

flt_directory = os.path.join(parent_dir_path, "biocomplexity", "data",
                             "LCCM_small_test_set_opus", "1991")
if __name__ == "__main__":
    lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
        "flt_storage", storage_location=flt_directory))
    lc.summary()
    for attr in lc.get_attribute_names():
        print attr
        lc.plot_map(attr, main=attr)
Ejemplo n.º 8
0
    def _create_flt_file(self, current_year, flt_directory_in,
                         flt_directory_out):

        logger.log_status("Convert output data for ", str(current_year))

        flt_directory_out = os.path.join(flt_directory_out, 'land_covers')

        if not os.path.exists(flt_directory_out):
            os.makedirs(flt_directory_out)

        lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
            'flt_storage', storage_location=flt_directory_in))
        relative_x = lc.get_attribute("relative_x")
        relative_y = lc.get_attribute("relative_y")
        flat_indices = relative_x * self.ncols * 1.0 + relative_y

        if flat_indices[5 * self.ncols:] is None or len(
                flat_indices[5 * self.ncols:]) == 0:
            offset = 0
        else:
            offset = 5 * self.ncols

        logger.start_block("Converting")
        try:
            for attr_name in lc.get_primary_attribute_names():
                if attr_name not in ["relative_x", "relative_y"]:
                    attr_name = "lct"  #-------------- only output lct now
                    logger.log_status("    ", attr_name)
                    attr = ma.filled(lc.get_attribute(attr_name),
                                     self.nodata_values).astype(float32)
                    self._create_flt_file2(
                        os.path.join(flt_directory_out, attr_name + ".lf4"),
                        attr, flat_indices, offset)
                    self._create_header(
                        os.path.join(flt_directory_out, attr_name +
                                     ".hdr"))  #<-- added 26 may 09 by mm
                    del attr
                    break  #-------------- only output lct now

            lc.load_dataset(attributes='*')
            if lc.get_computed_attribute_names() is not None:
                flt_directory_out = os.path.join(flt_directory_out, "computed")
                if not os.path.exists(flt_directory_out):
                    os.makedirs(flt_directory_out)
                for attr_name in lc.get_computed_attribute_names():
                    if attr_name not in ["_hidden_id_"]:
                        if attr_name[0:5] == "probs":
                            logger.log_status("    ", attr_name)
                            attr = ma.filled(
                                lc.get_attribute(attr_name),
                                self.nodata_values).astype(float32)
                            self._create_flt_file2(
                                os.path.join(flt_directory_out,
                                             attr_name + ".lf4"), attr,
                                flat_indices, offset)
                            self._create_header(
                                os.path.join(
                                    flt_directory_out, attr_name +
                                    ".hdr"))  #<-- added 26 may 09 by mm
                            del attr
        finally:
            #            lc.flush_dataset() # added 23 jun 2009 - not tested...
            logger.end_block()
Ejemplo n.º 9
0
#index_attribute = "sa9902_9599_0"
#index_attribute = "sall_99_02_0v1"
#index_attribute = sys.argv[2]
index_attribute = "lc0207_100k_0"

# 4. Years - date pair of input data; year is concatenated to flt_directory_in specified in #1
#years = [1991, 1995]
#years = [1995, 1999]
#years = [2002]
#years = sys.argv[3]
years = [2007, 2007]

lc1 = LandCoverDataset(in_storage=StorageFactory().get_storage(
    'flt_storage',
    storage_location=os.path.join(flt_directory_in, str(years[0]))),
                       out_storage=StorageFactory().get_storage(
                           'flt_storage',
                           storage_location=os.path.join(
                               flt_directory_out, str(years[0]))))

agents_index = where(lc1.get_attribute(index_attribute))[0]
lc1subset = DatasetSubset(lc1, agents_index)
print "Writing set 1:"
for attr in lc1.get_primary_attribute_names():
    print "   ", attr
    lc1subset.write_dataset(attributes=[attr], out_table_name="land_covers")
    lc1.delete_one_attribute(
        attr
    )  # leaving this line in causes the processing of every other input data file; commenting it causes memory error

lc2 = LandCoverDataset(in_storage=StorageFactory().get_storage(
 def _get_max_index(self, land_cover_path):
     land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage("flt_storage", storage_location=land_cover_path))
     return land_covers.size()
    def run(self, base_directory, urbansim_cache_directory, years, output_directory, temp_folder,
            coefficients_name, specification_name, convert_flt=True, convert_input=False):
        """ run the simulation
                base_directory: directory contains all years folder of lccm.
                urbansim_cache_directory: directory contains all years folder of urbansim cache.
                years: lists of year to run."""
        model = LandCoverChangeModel(self.possible_lcts, submodel_string=self.lct_attribute, 
                                     choice_attribute_name=self.lct_attribute, debuglevel=4)
        coefficients = Coefficients()
        storage = StorageFactory().get_storage('tab_storage', 
            storage_location=os.path.join(self.package_path, 'data'))
        coefficients.load(in_storage=storage, in_table_name=coefficients_name)
        specification = EquationSpecification(in_storage=storage)
        specification.load(in_table_name=specification_name)
        specification.set_variable_prefix("biocomplexity.land_cover.")
        constants = Constants()
        simulation_state = SimulationState()
        simulation_state.set_cache_directory(urbansim_cache_directory)
        attribute_cache = AttributeCache()
        SessionConfiguration(new_instance=True,
                             package_order=['biocomplexity', 'urbansim', 'opus_core'],
                             in_storage=AttributeCache())
                
        ncols = LccmConfiguration.ncols        
        
        if temp_folder is None:
            self.temp_land_cover_dir = tempfile.mkdtemp()
        else:
            self.temp_land_cover_dir = temp_folder
        
        for year in years:
            land_cover_path = self._generate_input_land_cover(year, base_directory, urbansim_cache_directory, 
                                                              years, output_directory, convert_flt, convert_input)
            #max_size = 174338406 (orig) - act. int: 19019944 (37632028 incl NoData)
            max_size = self._get_max_index(land_cover_path) # 1st instance of lc_dataset - but looks like a 'lite' version
            offset = min(LccmConfiguration.offset, max_size)
            s = 0
            t = offset
            while (s < t and t <= max_size):
                logger.log_status("Offset: ", s, t)
                index = arange(s,t)
                
                land_cover_cache_path=os.path.join(urbansim_cache_directory,str(year),'land_covers')
                self._clean_up_land_cover_cache(land_cover_cache_path)
                
                simulation_state.set_current_time(year)
                
                # 2nd instance of lc_dataset
                land_covers = LandCoverDataset(in_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                           out_storage=StorageFactory().get_storage('flt_storage', storage_location=land_cover_path),
                                           debuglevel=4)
                land_covers.subset_by_index(index)
#                land_covers.load_dataset()
                gridcells = GridcellDataset(in_storage=attribute_cache, debuglevel=4)

                agents_index = None
                model.run(specification, coefficients, land_covers, data_objects={"gridcell":gridcells,
                              "constants":constants, "flush_variables":True},
                              chunk_specification = {'nchunks':5}) ## chunk size set here
                land_covers.flush_dataset()
                del gridcells
                del land_covers

#                self._generate_output_flt(year, urbansim_cache_directory, output_directory, convert_flt)
                self._generate_output_flt2(year, urbansim_cache_directory, output_directory, convert_flt)
                
                if t >= max_size: break
                s = max(t-10*ncols,s)
                t = min(t+offset-10*ncols,max_size)
                
        # clean up temp storage after done simulation
        shutil.rmtree(self.temp_land_cover_dir)
Ejemplo n.º 12
0
        current_year = sys.argv[1]

        test_flag = options.test_flag

        flt_directory_in = options.input
        flt_directory_out = options.output

        logger.log_status("Convert output data for ", str(current_year))

    #todo: how to get 'land_covers' from dataset?
    flt_directory_out = os.path.join(flt_directory_out, 'land_covers')

    if not os.path.exists(flt_directory_out):
        os.makedirs(flt_directory_out)

    lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
        'flt_storage', storage_location=flt_directory_in))
    relative_x = lc.get_attribute("relative_x")
    relative_y = lc.get_attribute("relative_y")
    flat_indices = relative_x * ncols * 1.0 + relative_y

    if flat_indices[5 * ncols:] is None or len(flat_indices[5 * ncols:]) == 0:
        offset = 0
    else:
        offset = 5 * ncols

    #if os.path.exists("indices.lf4"):
    #    os.remove("indices.lf4")
    #flat_indices.tofile("indices.lf4")

    logger.start_block("Converting")
    try:
Ejemplo n.º 13
0
    def get_resources(self, data_dictionary, dataset):
        """Create resources for computing a variable. """
        resources = Resources()
        for key in data_dictionary.keys():
            if key in self.datasets:
                data = data_dictionary[key]
                if self.id_names[key] not in data_dictionary[key].keys(
                ) and not isinstance(self.id_names[key], list):

                    data[self.id_names[key]] = arange(1,\
                        len(data_dictionary[key][data_dictionary[key].keys()[0]])+1) # add id array

                if key == "land_cover":
                    land_cover_storage = StorageFactory().get_storage(
                        'dict_storage')
                    land_cover_table_name = 'land_cover'
                    land_cover_storage.write_table(
                        table_name=land_cover_table_name,
                        table_data=data,
                    )

                    lc = LandCoverDataset(
                        in_storage=land_cover_storage,
                        in_table_name=land_cover_table_name,
                    )

                    # add relative_x and relative_y
                    lc.get_id_attribute()
                    n = int(ceil(sqrt(lc.size())))

                    if "relative_x" not in data.keys():
                        x = (indices((n, n)) + 1)[1].ravel()
                        lc.add_attribute(x[0:lc.size()],
                                         "relative_x",
                                         metadata=1)
                    if "relative_y" not in data.keys():
                        y = (indices((n, n)) + 1)[0].ravel()
                        lc.add_attribute(y[0:lc.size()],
                                         "relative_y",
                                         metadata=1)

                    resources.merge({key: lc})

                if key == "gridcell":
                    gridcell_storage = StorageFactory().get_storage(
                        'dict_storage')
                    gridcell_table_name = 'gridcell'
                    gridcell_storage.write_table(
                        table_name=gridcell_table_name,
                        table_data=data,
                    )

                    gridcell_dataset = GridcellDataset(
                        in_storage=gridcell_storage,
                        in_table_name=gridcell_table_name,
                    )

                    resources.merge({key: gridcell_dataset})
            else:
                resources.merge({key: data_dictionary[key]})

        if dataset in self.interactions:
            pass
        else:
            resources.merge({"dataset": resources[dataset]})
        resources.merge({"check_variables": '*', "debug": 4})
        return resources
Ejemplo n.º 14
0
    def do_test_on_expected_data(self,
                                 input_variables_list,
                                 input_resources=None,
                                 element_atol=None,
                                 sum_atol=None):
        from biocomplexity.datasets.land_cover_dataset import LandCoverDataset
        from biocomplexity.tests.utils.land_cover_tests_utils import make_input_data

        import os

        package_dir_path = package().get_package_path()
        flt_directory = os.path.join(package_dir_path, "data",
                                     "small_test_set_opus", "1995")
        #flt_directory = r"C:\eclipse\LCCM_small_test_set_converted\1991"
        expected_lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
            'flt_storage', storage_location=flt_directory))
        expected_lc.load_dataset()

        temp_dir = make_input_data(flt_directory, input_variables_list)
        try:
            lc = LandCoverDataset(in_storage=StorageFactory().get_storage(
                'flt_storage', storage_location=temp_dir),
                                  out_storage=StorageFactory().get_storage(
                                      'flt_storage',
                                      storage_location=r"c:/tmp"))
            lc.load_dataset()

            dataset_pool = DatasetPool(package_order=['biocomplexity'],
                                       storage=StorageFactory().get_storage(
                                           'flt_storage',
                                           storage_location=temp_dir))
            dataset_pool._add_dataset('land_cover', lc)
            lc.compute_variables(self.variable_name,
                                 resources=input_resources,
                                 dataset_pool=dataset_pool)

            #lc.write_dataset(attributes='*')

            lc_values = lc.get_attribute(self.variable_name)
            expected_values = expected_lc.get_attribute(self.variable_name)

            if sum_atol is None: sum_atol = 1e-8
            if element_atol is None: element_atol = 1e-8

            if (not ma.allclose(lc_values, expected_values,
                                atol=element_atol)):
                logger.log_status(
                    "comparision using element-atol=%f, sum-atol=%f" %
                    (element_atol, sum_atol))
                logger.log_status("      computed      expected")
                logger.log_status("sum: ", lc_values.sum(),
                                  expected_values.sum())
                logger.log_status("max: ", max(lc_values),
                                  max(expected_values))
                logger.log_status("min: ", min(lc_values),
                                  min(expected_values))

                c1 = 0
                c2 = 0
                for (i, j) in zip(lc_values, expected_values):
                    if i != 0:
                        c1 = c1 + 1
                    if j != 0:
                        c2 = c2 + 1

                logger.log_status("# non-zeros values: ", c1, c2)
                logger.log_status("max distance between 2 elements: %f" %
                                  self._max_distance_between_two_elements(
                                      lc_values, expected_values))
                logger.log_status(lc_values, expected_values)
                count = 0
                total = 0
                for (i, j) in zip(lc_values, expected_values):
                    if i != j:
                        count = count + 1
                    total = total + 1
                logger.log_status("# different elements = %d, over %d, with a %f percentage" \
                                % (count, total, count*1.0/total))

            #self.assert_(ma.allclose(lc_values.sum(), expected_values.sum(), atol=sum_atol))
            self.assert_(
                ma.allclose(lc_values, expected_values, atol=element_atol))
        finally:
            if os.path.exists(temp_dir):
                rmtree(temp_dir)