def write_dataset(self, dataset, attributes, out_storage): with block("Exporting %s dataset" % dataset.get_dataset_name()): if isinstance(out_storage, csv_storage): data = {} for attr in attributes: data[VariableName(attr).get_alias()] = dataset[attr] dataframe = DataFrame(data) output_loc = out_storage.get_storage_location() if not os.path.exists(output_loc): os.makedirs(output_loc) filename = dataset.out_table_name_default + '.csv' filepath = os.path.join(output_loc, filename) logger.log_status("export to %s" % filepath) dataframe.to_csv(filepath, cols=data.keys(), index=False) else: dataset.write_dataset(attributes=attributes, out_storage=out_storage)
def import_travel_model_data(config, year): cache_directory = config['cache_directory'] simulation_state = SimulationState() simulation_state.set_current_time(year) simulation_state.set_cache_directory(cache_directory) out_store = AttributeCache().get_flt_storage_for_year(year+1) out_store_loc = out_store.get_storage_location() tm_config = config['travel_model_configuration'] data_to_import = tm_config['tm_to_urbansim_variable_mapping'] base_dir = mtc_common.tm_get_base_dir(config) data_dir = tm_config[year]['data_dir'] for dataset_name, skim_file in data_to_import.iteritems(): skim_file = os.path.join(base_dir, data_dir, skim_file) data = read_csv(skim_file, header=0) with block("Caching {} to {}".format(dataset_name, out_store_loc)): logger.log_status("Source file {}".format(skim_file)) opus_ds = to_opus_dataset(data, out_store, dataset_name)
from pandas import DataFrame df = {} for attr in opus_dataset.get_known_attribute_names(): df[attr] = opus_dataset[attr] df = DataFrame(df) return df units_df = {} bldg_df = {} for attr in units.get_known_attribute_names(): units_df[attr] = units[attr] for attr in bldg.get_known_attribute_names(): bldg_df[attr] = bldg[attr] with block('opus join'): results_opus = units.compute_variables('residential_unit.disaggregate(building.building_type_id)', dataset_pool=dataset_pool) units_df = DataFrame(units_df) bldg_df = DataFrame(bldg_df) with block('pandas join without index'): units_merged1 = merge(units_df, bldg_df[['building_id', 'building_type_id']], on='building_id', sort=False, how='left') results_df1 = units_merged1['building_type_id'] results_df1.fillna(value=-1, inplace=True) bldg_df.set_index('building_id', inplace=True) with block('pandas join with index'): units_merged2 = units_df.join(bldg_df['building_type_id'], on='building_id', how='left') results_df2 = units_merged2['building_type_id']
def run(self, year=None, years_to_run=[], configuration=None): if year not in years_to_run or self.data_to_export == None: return cache_directory = configuration['cache_directory'] simulation_state = SimulationState() simulation_state.set_cache_directory(cache_directory) simulation_state.set_current_time(year) attribute_cache = AttributeCache() package_order=configuration['dataset_pool_configuration'].package_order dataset_pool = SessionConfiguration(new_instance=True, package_order=package_order, in_storage=attribute_cache ).get_dataset_pool() out_dir = os.path.join(cache_directory, "mtc_data") out_storage = csv_storage(storage_location=out_dir) # Adjust the age distribution per ABAG/MTC's specifications age_control_dir = os.path.join(paths.OPUS_DATA_PATH, configuration['project_name'], "ageControl") age_control_storage = csv_storage(storage_location=age_control_dir) age_control_files = os.listdir(age_control_dir) years = np.array(map(lambda x: int(os.path.basename(x).replace("tazData", "").replace(".csv", "")), glob.glob(os.path.join(age_control_dir, "tazData*.csv")))) closest_year = years[np.argmin(np.abs(years - [year]*len(years)))] if closest_year != year: logger.log_warning("Could not find age control data for " + str(year) + ". Choosing nearest year " + str(closest_year) + ".") age_control_table = age_control_storage.load_table("tazData" + str(closest_year), lowercase=False) # Calculate the ABAG shares of person by age age_categories = ['AGE0004', 'AGE0519', 'AGE2044', 'AGE4564', 'AGE65P'] age_category_sums = dict((k, age_control_table[k].sum()) for k in age_categories) total = sum(age_category_sums.values()) abag_age_category_shares = dict((k, age_category_sums[k]/total) for k in age_categories) for data_fname, variable_mapping in self.data_to_export.iteritems(): if not flip_urbansim_to_tm_variable_mappling: col_names = variable_mapping.values() variables_aliases = ["=".join(mapping[::-1]) for mapping in \ variable_mapping.iteritems()] else: col_names = variable_mapping.keys() variables_aliases = ["=".join(mapping) for mapping in \ variable_mapping.iteritems()] dataset_name = VariableName(variables_aliases[0]).get_dataset_name() dataset = dataset_pool.get_dataset(dataset_name) dataset.compute_variables(variables_aliases) if data_fname == "ABAGData": logger.log_status("Adjusting ABAGData to match age controls") age_category_sums = dict((k, dataset[k].sum()) for k in age_categories) total = sum(age_category_sums.values()) us_age_category_shares = dict((k, age_category_sums[k]/total) for k in age_categories) adjustments = dict((k, abag_age_category_shares[k]/us_age_category_shares[k]) for k in age_categories) diff = np.zeros(dataset.n) for k in age_categories: before = dataset[k] dataset[k] = np.array(map(lambda v : round(v*adjustments[k]), dataset.get_attribute(k))) diff += (dataset[k] - before) dataset["TOTPOP"] += diff dataset["HHPOP"] += diff logger.log_status("NOTE: Adjusted total population by %d (%2.3f%%) due to rounding error." % (int(diff.sum()), diff.sum()*100/total)) org_fname = os.path.join(out_dir, "%s.computed.csv" % data_fname) new_fname = os.path.join(out_dir, "%s%s.csv" % (year,data_fname)) block_msg = "Writing {} for travel model to {}".format(data_fname, new_fname) with block(block_msg): dataset.write_dataset(attributes=col_names, out_storage=out_storage, out_table_name=data_fname) #rename & process header shutil.move(org_fname, new_fname) os.system("sed 's/:[a-z][0-9]//g' -i %s" % new_fname)