def run (self): time = -1 latest = "" directoryname = 'data/vibe_gridcell/runs/' if self.isParcel is True: directoryname = 'data/vibe_parcel/' for filename in os.listdir(os.path.join(os.environ['OPUS_HOME'], directoryname)): print filename if time == -1: time = os.path.getmtime(os.path.join(os.environ['OPUS_HOME'], directoryname, filename)) latest = filename if os.path.getmtime(os.path.join(os.environ['OPUS_HOME'], directoryname, filename)) > time: time = os.path.getmtime(os.path.join(os.environ['OPUS_HOME'], directoryname, filename)) latest = filename config = DatabaseServerConfiguration(host_name = 'localhost', user_name = 'urbansim', password = '******', protocol = 'mysql') db_server = DatabaseServer(config) for i in range(1981, 1980+int(self.YearsToRun)): newdir = latest + '/' + str(i) flt_directory_in = os.path.join(os.environ['OPUS_HOME'], directoryname ,newdir) input_storage = flt_storage(storage_location = flt_directory_in) db = db_server.get_database('ress_'+str(i)) output_storage = StorageFactory().get_storage('sql_storage', storage_location = db) ExportStorage().export(in_storage=input_storage, out_storage=output_storage)
def __init__(self, reference_location=None): """ "reference_location" is the directory of the reference cache and should include the year. If it is None, the simulation directory in its start year is taken. """ if reference_location is None: reference_location = os.path.join(SimulationState().get_cache_directory(), "%s" % SimulationState().get_start_time()) self.reference_storage = flt_storage(reference_location)
def get_flt_storage_for_year(self, year): """Returns a flt_storage object for this year of this cache. """ if year is None: return None if year not in self._flt_storage_per_year.keys(): base_directory = os.path.join(self.get_storage_location(), str(year)) self._flt_storage_per_year[year] = flt_storage(storage_location=base_directory) return self._flt_storage_per_year[year]
def run(self, directory=None, check_size=True): """ "directory" is the cache to be compared to the reference. It should not include the year as the model checks all years. Set "check_sizes" to False if no size check of the datasets is required. """ if directory is None: directory = SimulationState().get_cache_directory() self.cache = AttributeCache(directory) year_orig = SimulationState().get_current_time() years = self.years_in_cache() SimulationState().set_current_time(years[0]) storages = {} for year in years: storages[year] = flt_storage(os.path.join(self.cache.get_storage_location(), '%s' % year)) df = pd.DataFrame(columns=["Table", "Less-than-ref", "More-than-ref", "Year", "Size", "Size-ref"]) tables = self.cache.get_table_names() for table in tables: columns_list = self.cache.get_column_names(table) columns = Set(columns_list) ref_columns_list = self.reference_storage.get_column_names(table, lowercase=True) ref_columns = Set(ref_columns_list) more = columns.difference(ref_columns) less = ref_columns.difference(columns) samesize = True if check_size: table_size = self.cache.load_table(table, columns_list[0])[columns_list[0]].size reftable_size = self.reference_storage.load_table(table, ref_columns_list[0])[ref_columns_list[0]].size if table_size <> reftable_size: samesize = False if len(more) == 0 and len(less) == 0 and samesize: continue df.loc[df.shape[0]] = [table, ', '.join(less), ', '.join(more), '', 0, 0] if len(more) == 0 and samesize: continue # if there are columns in the "more" column, write out the corresponding years columns_and_years = self.cache._get_column_names_and_years(table) more_years = [] for col, year in columns_and_years: if col in more: more_years.append(year) df.loc[df.shape[0]-1, "Year"] = ', '.join(np.unique(np.array(more_years).astype("str"))) if not samesize: # there is difference in table sizes df.loc[df.shape[0]-1, "Size"] = table_size df.loc[df.shape[0]-1, "Size-ref"] = reftable_size if not check_size or (df['Size'].sum()==0 and df['Size-ref'].sum()==0): # remove the size columns if not used del df['Size'] del df['Size-ref'] if df.shape[0] > 0: logger.log_status("Differences in data structure relative to %s:" % self.reference_storage.get_storage_location()) logger.log_status(df) else: logger.log_status("Data structure corresponds to the one in %s" % self.reference_storage.get_storage_location()) return df
def get_dataset_pool(self, package_order): opus_core_path = package().get_opus_core_path() cache_dir = os.path.join(opus_core_path, 'data', 'test_cache', '1980') # These imports are here to prevent cycles in the imports. from opus_core.resources import Resources from opus_core.store.flt_storage import flt_storage storage = flt_storage(Resources({'storage_location':cache_dir})) return DatasetPool(package_order, storage=storage)
def get_dataset_pool(self, package_order): opus_core_path = package().get_opus_core_path() cache_dir = os.path.join(opus_core_path, 'data', 'test_cache', '1980') # These imports are here to prevent cycles in the imports. from opus_core.resources import Resources from opus_core.store.flt_storage import flt_storage storage = flt_storage(Resources({'storage_location': cache_dir})) return DatasetPool(package_order, storage=storage)
def load_table(self, table_name, column_names=Storage.ALL_COLUMNS, lowercase=True): result = {} columns_names_and_years = self._get_column_names_and_years(table_name, lowercase=lowercase) for column_name, year in columns_names_and_years: if isinstance(column_names, list) and column_name not in column_names: continue storage = flt_storage(os.path.join(self.get_storage_location(), '%s' % year)) result.update(storage.load_table(table_name, column_names=[column_name], lowercase=lowercase)) return result
def get_table_names(self): result = [] for year in self._get_sorted_list_of_years(): try: storage = flt_storage(os.path.join(self.get_storage_location(), '%s' % year)) tables = storage.get_table_names() tables = [table for table in tables if (not table in result)] result.extend(tables) except: pass return result
def get_flt_storage_for_year(self, year): """Returns a flt_storage object for this year of this cache. """ if year is None: return None if year not in self._flt_storage_per_year.keys(): base_directory = os.path.join(self.get_storage_location(), str(year)) self._flt_storage_per_year[year] = flt_storage( storage_location=base_directory) return self._flt_storage_per_year[year]
def get_table_names(self): result = [] for year in self._get_sorted_list_of_years(): try: storage = flt_storage( os.path.join(self.get_storage_location(), '%s' % year)) tables = storage.get_table_names() tables = [table for table in tables if (not table in result)] result.extend(tables) except: pass return result
def test_flt_dataset(self): import opus_core from opus_core.store.flt_storage import flt_storage attribute = 'little_endian' location = os.path.join(opus_core.__path__[0], 'data', 'flt') storage = flt_storage(storage_location=location) ds = Dataset(in_storage=storage, id_name=attribute, in_table_name='endians') self.assertAlmostEqual(11.0, ds.get_attribute_by_index(attribute, 0)) self.assertEqual(None, ds.get_attribute_header(attribute))
def delete_computed_tables(self): # Use this method only in conjunction with deleting computed attributes of the datasets: # dataset.delete_computed_attributes() year = SimulationState().get_current_time() storage_directory = os.path.join(self.get_storage_location(), '%s' % year) if not os.path.exists(storage_directory): return array([]) storage = flt_storage(storage_directory) tables = storage.get_table_names() tables = [table for table in tables if (table.endswith('.computed'))] deleted = array(map(lambda table: storage.delete_table(table), tables)) if deleted.size > 0: return array(tables)[deleted] return array(tables)
def _get_column_names_and_years(self, table_name, lowercase=True): column_names = [] result = [] found=False for year in self._get_sorted_list_of_years(): try: storage = flt_storage(os.path.join(self.get_storage_location(), '%s' % year)) columns = storage.get_column_names(table_name, lowercase) columns = [column for column in columns if (not column in column_names)] column_names.extend(columns) result.extend([(column_name, year) for column_name in columns]) found=True except: pass if not found: raise StandardError,"Table %s not found" % table_name return result
def _get_sorted_list_of_years(self): """Returns a sorted list (descending order) of the current and prior years having directories in the cache directory. """ from os import listdir current_year = SimulationState().get_current_time() dirs = flt_storage(self.get_storage_location()).listdir_in_base_directory() years = [] for dir_name in dirs: try: year = int(dir_name) if (year <= current_year): years.append(year) except: pass years.sort() years.reverse() return years
def _get_sorted_list_of_years(self): """Returns a sorted list (descending order) of the current and prior years having directories in the cache directory. """ from os import listdir current_year = SimulationState().get_current_time() dirs = flt_storage( self.get_storage_location()).listdir_in_base_directory() years = [] for dir_name in dirs: try: year = int(dir_name) if (year <= current_year): years.append(year) except: pass years.sort() years.reverse() return years
def load_table(self, table_name, column_names=Storage.ALL_COLUMNS, lowercase=True): result = {} columns_names_and_years = self._get_column_names_and_years( table_name, lowercase=lowercase) for column_name, year in columns_names_and_years: if isinstance(column_names, list) and column_name not in column_names: continue storage = flt_storage( os.path.join(self.get_storage_location(), '%s' % year)) result.update( storage.load_table(table_name, column_names=[column_name], lowercase=lowercase)) return result
def _get_column_names_and_years(self, table_name, lowercase=True): column_names = [] result = [] found = False for year in self._get_sorted_list_of_years(): try: storage = flt_storage( os.path.join(self.get_storage_location(), '%s' % year)) columns = storage.get_column_names(table_name, lowercase) columns = [ column for column in columns if (not column in column_names) ] column_names.extend(columns) result.extend([(column_name, year) for column_name in columns]) found = True except: pass if not found: raise StandardError, "Table %s not found" % table_name return result
def run(self): year_orig = SimulationState().get_current_time() years = self.years_in_cache() SimulationState().set_current_time(years[0]) storages = {} for year in years: storages[year] = flt_storage(os.path.join(self.cache.get_storage_location(), '%s' % year)) tables = self.cache.get_table_names() counts = pd.Series(np.zeros(len(tables), dtype="int32"), index=tables) for table in tables: columns = self.cache._get_column_names_and_years(table) values = [] names = [] colyears = [] for col, year in columns: if col in names: continue data = storages[year].load_table(table, column_names=col) values.append(data[col].size) names.append(col) colyears.append(year) values = np.array(values) if(all(values == values[0])): continue # all attributes have the same size # there is an inconsistency in attributes length names = np.array(names) colyears = np.array(colyears) uc = np.unique(values, return_counts=True) imax = np.argmax(uc[1]) idx = np.where(values <> uc[0][imax])[0] df = pd.DataFrame({"column": names[idx], "year": colyears[idx], "size": values[idx]}) df = df.append(pd.DataFrame({"column": np.array(["all other columns"]), "year": np.array([years[0]]), "size": np.array([uc[0][imax]])})) logger.log_status("Inconsistency in table ", table, ":\n", df) counts[table] = df.shape[0] - 1 SimulationState().set_current_time(year_orig) logger.log_status("Model total:", counts.sum(), ' size inconsistencies found.') return counts
parser = OptionParser() parser.add_option('-c', '--cache_path', dest='cache_path', type='string', help='The filesystem path to the cache to export (required)') parser.add_option('-o', '--output_directory', dest='output_directory', type='string', help='The filesystem path of the database to which ' 'output will be written (required)') parser.add_option('-t', '--table_name', dest='table_name', type='string', help='Name of table to be exported (optional). Used if only one table should be exported.') (options, args) = parser.parse_args() cache_path = options.cache_path output_directory = options.output_directory table_name = options.table_name if None in (cache_path, output_directory): parser.print_help() sys.exit(1) in_storage = flt_storage(storage_location=cache_path) out_storage = csv_storage(storage_location=output_directory) if not os.path.exists(output_directory): os.makedirs(output_directory) if table_name is not None: ExportStorage().export_dataset(table_name, in_storage=in_storage, out_storage=out_storage) else: ExportStorage().export(in_storage=in_storage, out_storage=out_storage)
def write_table(self, table_name, table_data, mode=Storage.OVERWRITE): year = SimulationState().get_current_time() storage = flt_storage( os.path.join(self.get_storage_location(), '%s' % year)) return storage.write_table(table_name, table_data, mode)
def write_table(self, table_name, table_data, mode = Storage.OVERWRITE): year = SimulationState().get_current_time() storage = flt_storage(os.path.join(self.get_storage_location(), '%s' % year)) return storage.write_table(table_name, table_data, mode)
parser = OptionParser() parser.add_option('-c', '--cache_path', dest='cache_path', type='string', help='The filesystem path to the cache to export (required)') parser.add_option('-o', '--output_directory', dest='output_directory', type='string', help='The filesystem path of the database to which ' 'output will be written (required)') parser.add_option('-t', '--table_name', dest='table_name', type='string', help='Name of table to be exported (optional). Used if only one table should be exported.') (options, args) = parser.parse_args() cache_path = options.cache_path output_directory = options.output_directory table_name = options.table_name if None in (cache_path, output_directory): parser.print_help() sys.exit(1) in_storage = flt_storage(storage_location = cache_path) out_storage = tab_storage(storage_location = output_directory) if not os.path.exists(output_directory): os.makedirs(output_directory) if table_name is not None: ExportStorage().export_dataset(table_name, in_storage=in_storage, out_storage=out_storage) else: ExportStorage().export(in_storage=in_storage, out_storage=out_storage)