def __init__(self, regression_procedure="opus_core.linear_regression", filter_attribute=None, submodel_string="building_type_id", outcome_attribute_name=None, model_name=None, model_short_name=None, run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None): self.filter = filter_attribute if model_name is not None: self.model_name = model_name if model_short_name is not None: self.model_short_name = model_short_name if outcome_attribute_name is not None: self.outcome_attribute_name = outcome_attribute_name RegressionModel.__init__(self, regression_procedure=regression_procedure, submodel_string=submodel_string, run_config=run_config, estimate_config=estimate_config, debuglevel=debuglevel, dataset_pool=dataset_pool)
def skip_test_bma(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='dataset', table_data={ "id": arange(100) + 1, "attr1": concatenate((random.randint(0, 10, 50), random.randint(20, 40, 50))), "attr2": random.ranf(100), "outcome": array(50 * [0] + 50 * [1]) }) ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id") specification = EquationSpecification( variables=array(["constant", "attr2", "attr1"]), coefficients=array(["constant", "ba2", "ba1"])) filename = 'bma_output.pdf' model = RegressionModel( estimate_config={'bma_imageplot_filename': filename}) model.estimate(specification, ds, "outcome", procedure="opus_core.bma_for_linear_regression_r")
def __init__(self, group_member, datasets_grouping_attribute, **kwargs): """ 'group_member' is of type ModelGroupMember. 'datasets_grouping_attribute' is attribute of the dataset (passed to the 'run' and 'estimate' method) that is used for grouping. """ self.group_member = group_member group_member_name = group_member.get_member_name() self.group_member.set_agents_grouping_attribute(datasets_grouping_attribute) self.model_name = "%s %s" % (group_member_name.capitalize(), self.model_name) self.model_short_name = "%s %s" % (group_member_name.capitalize(), self.model_short_name), RegressionModel.__init__(self, **kwargs)
def __init__(self, group_member, datasets_grouping_attribute, **kwargs): """ 'group_member' is of type ModelGroupMember. 'datasets_grouping_attribute' is attribute of the dataset (passed to the 'run' and 'estimate' method) that is used for grouping. """ self.group_member = group_member group_member_name = group_member.get_member_name() self.group_member.set_agents_grouping_attribute( datasets_grouping_attribute) self.model_name = "%s %s" % (group_member_name.capitalize(), self.model_name) self.model_short_name = "%s %s" % (group_member_name.capitalize(), self.model_short_name), RegressionModel.__init__(self, **kwargs)
def __init__(self, regression_procedure="opus_core.linear_regression", filter_attribute="urbansim.gridcell.has_residential_units", submodel_string="development_type_id", run_config=None, estimate_config=None, debuglevel=0): self.filter_attribute = filter_attribute RegressionModel.__init__(self, regression_procedure=regression_procedure, submodel_string=submodel_string, run_config=run_config, estimate_config=estimate_config, debuglevel=debuglevel)
def __init__(self, regression_procedure="opus_core.linear_regression", submodel_string=None, outcome_attribute = None, run_config=None, estimate_config=None, debuglevel=None, dataset_pool=None): """'outcome_attribute' must be specified in order to compute the residuals. """ RegressionModel.__init__(self, regression_procedure=regression_procedure, submodel_string=submodel_string, run_config=run_config, estimate_config=estimate_config, debuglevel=debuglevel, dataset_pool=dataset_pool) self.outcome_attribute = outcome_attribute if (self.outcome_attribute is not None) and not isinstance(self.outcome_attribute, VariableName): self.outcome_attribute = VariableName(self.outcome_attribute)
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0): """ For info on the arguments see RegressionModel. dataset should be an instance of DevelopmentProjectProposalDataset, if it isn't, create dataset on the fly with parcel and development template index and self.filter_attribute (passed in __init___) are relative to dataset """ if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) proposal_component_set = create_from_proposals_and_template_components(dataset, self.dataset_pool.get_dataset('development_template_component')) self.dataset_pool.replace_dataset(proposal_component_set.get_dataset_name(), proposal_component_set) #proposal_component_set.flush_dataset_if_low_memory_mode() #dataset.flush_dataset_if_low_memory_mode() result = RegressionModel.run(self, specification, coefficients, dataset, index=index, chunk_specification=chunk_specification, data_objects=data_objects, run_config=run_config, debuglevel=debuglevel) if re.search("^ln_", self.outcome_attribute_name): # if the outcome attr. name starts with 'ln_' # the results will be exponentiated. self.outcome_attribute_name = self.outcome_attribute_name[3:len(self.outcome_attribute_name)] result = exp(result) if self.outcome_attribute_name not in dataset.get_known_attribute_names(): dataset.add_primary_attribute(self.defalult_value + zeros(dataset.size()), self.outcome_attribute_name) dataset.set_values_of_one_attribute(self.outcome_attribute_name, result, index=index) self.correct_infinite_values(dataset, self.outcome_attribute_name) return dataset
def get_configuration(self): return { "init": { "regression_procedure": { "default": "opus_core.linear_regression", "type": str }, "submodel_string": { "default": "development_type_id", "type": str }, "run_config": { "default": None, "type": Resources }, "estimate_config": { "default": None, "type": Resources }, "debuglevel": { "default": 0, "type": int } }, "run": RegressionModel.get_configuration(self)["run"] }
def estimate(self, specification, dataset, outcome_attribute="housing_price", index=None, procedure="opus_core.estimate_linear_regression", data_objects=None, estimate_config=None, debuglevel=0): if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.filter_attribute <> None: res = Resources({"debug": debuglevel}) index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool, resources=res) return RegressionModel.estimate(self, specification, dataset, outcome_attribute, index, procedure, estimate_config=estimate_config, debuglevel=debuglevel)
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0): """ For info on the arguments see RegressionModel. """ regression_outcome = RegressionModel.run( self, specification, coefficients, dataset, index=index, chunk_specification=chunk_specification, data_objects=data_objects, run_config=run_config, debuglevel=debuglevel) if (regression_outcome == None) or (regression_outcome.size <= 0): return regression_outcome if index == None: index = arange(dataset.size()) result = exp(regression_outcome) result = result / (1.0 + result) if (self.attribute_to_modify not in dataset.get_known_attribute_names()): dataset.add_attribute(name=self.attribute_to_modify, data=zeros((dataset.size(), ), dtype=float32)) dataset.set_values_of_one_attribute(self.attribute_to_modify, result, index) return result
def __init__(self, regression_procedure="opus_core.linear_regression", filter_attribute=None, submodel_string=None, outcome_attribute=None, run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None): self.filter_attribute = filter_attribute if outcome_attribute is not None: self.outcome_attribute = outcome_attribute RegressionModel.__init__(self, regression_procedure=regression_procedure, submodel_string=submodel_string, run_config=run_config, estimate_config=estimate_config, debuglevel=debuglevel, dataset_pool=dataset_pool)
def prepare_for_run(self, add_member_prefix=True, specification_storage=None, specification_table=None, coefficients_storage=None, coefficients_table=None, **kwargs): if add_member_prefix: specification_table, coefficients_table = \ self.group_member.add_member_prefix_to_table_names([specification_table, coefficients_table]) return RegressionModel.prepare_for_run(self, specification_storage=specification_storage, specification_table=specification_table, coefficients_storage=coefficients_storage, coefficients_table=coefficients_table, **kwargs)
def estimate(self, specification, dataset, outcome_attribute, index=None, **kwargs): if index is None: index = arange(dataset.size()) data_objects = kwargs.get("data_objects",{}) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) # filter out agents for this group new_index = self.group_member.get_index_of_my_agents(dataset, index, dataset_pool=self.dataset_pool) return RegressionModel.estimate(self, specification, dataset, outcome_attribute, index=index[new_index], **kwargs)
def __init__(self, regression_procedure="opus_core.linear_regression", filter = "urbansim.gridcell.is_in_development_type_group_developable", submodel_string = "development_type_id", run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None): self.filter = filter if filter is None: if run_config is not None and 'filter' in run_config: self.filter = run_config["filter"] elif estimate_config is not None and 'filter' in estimate_config: self.filter = estimate_config["filter"] RegressionModel.__init__(self, regression_procedure=regression_procedure, submodel_string=submodel_string, run_config=run_config, estimate_config=estimate_config, debuglevel=debuglevel, dataset_pool=dataset_pool)
def estimate(self, specification, dataset, outcome_attribute="urbansim.gridcell.ln_total_land_value", index = None, procedure="opus_core.estimate_linear_regression", data_objects=None, estimate_config=None, debuglevel=0): if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.filter <> None: res = Resources({"debug":debuglevel}) index = dataset.get_filtered_index(self.filter, threshold=0, index=index, dataset_pool=self.dataset_pool, resources=res) return RegressionModel.estimate(self, specification, dataset, outcome_attribute, index, procedure, estimate_config=estimate_config, debuglevel=debuglevel)
def __init__(self, regression_procedure="opus_core.linear_regression", outcome_attribute="month_combination_2", filter_attribute=None, submodel_string="land_use_type_id", run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None): self.outcome_attribute = outcome_attribute if (self.outcome_attribute is not None) and not isinstance(self.outcome_attribute, VariableName): self.outcome_attribute = VariableName(self.outcome_attribute) self.filter_attribute = filter_attribute RegressionModel.__init__(self, regression_procedure=regression_procedure, submodel_string=submodel_string, run_config=run_config, estimate_config=estimate_config, debuglevel=debuglevel, dataset_pool=dataset_pool)
def get_configuration(self): return { "init":{ "regression_procedure":{"default":"opus_core.linear_regression", "type":str}, "submodel_string":{"default":"development_type_id", "type":str}, "run_config":{"default":None, "type":Resources}, "estimate_config":{"default":None, "type":Resources}, "debuglevel": {"default":0, "type":int}}, "run": RegressionModel.get_configuration(self)["run"] }
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0): """ For info on the arguments see RegressionModel. dataset should be an instance of DevelopmentProjectProposalDataset, if it isn't, create dataset on the fly with parcel and development template index and self.filter_attribute (passed in __init___) are relative to dataset """ if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) proposal_component_set = create_from_proposals_and_template_components( dataset, self.dataset_pool.get_dataset('development_template_component')) self.dataset_pool.replace_dataset( proposal_component_set.get_dataset_name(), proposal_component_set) #proposal_component_set.flush_dataset_if_low_memory_mode() #dataset.flush_dataset_if_low_memory_mode() result = RegressionModel.run(self, specification, coefficients, dataset, index=index, chunk_specification=chunk_specification, data_objects=data_objects, run_config=run_config, debuglevel=debuglevel) if re.search("^ln_", self.outcome_attribute_name ): # if the outcome attr. name starts with 'ln_' # the results will be exponentiated. self.outcome_attribute_name = self.outcome_attribute_name[ 3:len(self.outcome_attribute_name)] result = exp(result) if self.outcome_attribute_name not in dataset.get_known_attribute_names( ): dataset.add_primary_attribute( self.defalult_value + zeros(dataset.size()), self.outcome_attribute_name) dataset.set_values_of_one_attribute(self.outcome_attribute_name, result, index=index) self.correct_infinite_values(dataset, self.outcome_attribute_name) return dataset
def __init__(self, regression_procedure="opus_core.linear_regression", submodel_string=None, outcome_attribute=None, run_config=None, estimate_config=None, debuglevel=None, dataset_pool=None): """'outcome_attribute' must be specified in order to compute the residuals. """ RegressionModel.__init__(self, regression_procedure=regression_procedure, submodel_string=submodel_string, run_config=run_config, estimate_config=estimate_config, debuglevel=debuglevel, dataset_pool=dataset_pool) self.outcome_attribute = outcome_attribute if (self.outcome_attribute is not None) and not isinstance( self.outcome_attribute, VariableName): self.outcome_attribute = VariableName(self.outcome_attribute)
def skip_test_bma(self): storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name='dataset', table_data={ "id":arange(100)+1, "attr1":concatenate((random.randint(0,10, 50), random.randint(20,40, 50))), "attr2":random.ranf(100), "outcome": array(50*[0]+50*[1]) } ) ds = Dataset(in_storage=storage, in_table_name='dataset', id_name="id") specification = EquationSpecification( variables=array(["constant", "attr2", "attr1"]), coefficients=array(["constant", "ba2", "ba1"])) filename = 'bma_output.pdf' model = RegressionModel(estimate_config={'bma_imageplot_filename': filename}) model.estimate(specification, ds, "outcome", procedure="opus_core.bma_for_linear_regression_r")
def __init__(self, regression_procedure="opus_core.linear_regression", outcome_attribute="month_combination_2", filter_attribute=None, submodel_string="land_use_type_id", run_config=None, estimate_config=None, debuglevel=0, dataset_pool=None): self.outcome_attribute = outcome_attribute if (self.outcome_attribute is not None) and not isinstance( self.outcome_attribute, VariableName): self.outcome_attribute = VariableName(self.outcome_attribute) self.filter_attribute = filter_attribute RegressionModel.__init__(self, regression_procedure=regression_procedure, submodel_string=submodel_string, run_config=run_config, estimate_config=estimate_config, debuglevel=debuglevel, dataset_pool=dataset_pool)
def run(self, specification, coefficients, dataset, index=None, **kwargs): if index is None: index = arange(dataset.size()) data_objects = kwargs.get("data_objects",{}) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) # filter out agents for this group new_index = self.group_member.get_index_of_my_agents(dataset, index, dataset_pool=self.dataset_pool) regresult = RegressionModel.run(self, specification, coefficients, dataset, index=index[new_index], **kwargs) result = zeros(index.size, dtype=float32) result[new_index] = regresult return result
def prepare_for_estimate(self, add_member_prefix=True, specification_dict=None, specification_storage=None, specification_table=None, **kwargs): if add_member_prefix: specification_table = self.group_member.add_member_prefix_to_table_names( [specification_table]) return RegressionModel.prepare_for_estimate( specification_dict=specification_dict, specification_storage=specification_storage, specification_table=specification_table, **kwargs)
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0): """ For info on the arguments see RegressionModel. """ outcome_attribute_short = self.outcome_attribute.get_alias() if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.filter_attribute <> None: res = Resources({"debug":debuglevel}) index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool, resources=res) current_year = SimulationState().get_current_time() current_month = int( re.search('\d+$', outcome_attribute_short).group() ) # date in YYYYMM format, matching to the id_name field of weather dataset date = int( "%d%02d" % (current_year, current_month) ) date = array([date] * dataset.size()) if "date" in dataset.get_known_attribute_names(): dataset.set_values_of_one_attribute("date", date) else: dataset.add_primary_attribute(date, "date") water_demand = RegressionModel.run(self, specification, coefficients, dataset, index, chunk_specification, run_config=run_config, debuglevel=debuglevel) if (water_demand == None) or (water_demand.size <=0): return water_demand if index == None: index = arange(dataset.size()) if re.search("^ln_", outcome_attribute_short): # if the outcome attr. name starts with 'ln_' the results will be exponentiated. outcome_attribute_name = outcome_attribute_short[3:len(outcome_attribute_short)] water_demand = exp(water_demand) else: outcome_attribute_name = outcome_attribute_short if outcome_attribute_name in dataset.get_known_attribute_names(): dataset.set_values_of_one_attribute(outcome_attribute_name, water_demand, index) else: results = zeros(dataset.size(), dtype=water_demand.dtype) results[index] = water_demand dataset.add_primary_attribute(results, outcome_attribute_name) return water_demand
def run(self, specification, coefficients, dataset, index=None, **kwargs): """ See description above. If missing values of the outcome attribute are suppose to be excluded from the addition of the initial residuals, set an entry of run_config 'exclude_missing_values_from_initial_error' to True. Additionaly, an entry 'outcome_attribute_missing_value' specifies the missing value (default is 0). Similarly, if outliers are to be excluded, the run_config entry "exclude_outliers_from_initial_error" should be set to True. In such a case, run_config entries 'outlier_is_less_than' and 'outlier_is_greater_than' can define lower and upper bounds for outliers. By default, an outlier is a data point smaller than 0. There is no default upper bound. """ if self.outcome_attribute is None: raise StandardError, "An outcome attribute must be specified for this model. Pass it into the initialization." if self.outcome_attribute.get_alias() not in dataset.get_known_attribute_names(): try: dataset.compute_variables(self.outcome_attribute, dataset_pool=self.dataset_pool) except: raise StandardError, "The outcome attribute %s must be a known attribute of the dataset %s." % ( self.outcome_attribute.get_alias(), dataset.get_dataset_name()) if index is None: index = arange(dataset.size()) original_data = dataset.get_attribute_by_index(self.outcome_attribute, index) outcome = RegressionModel.run(self, specification, coefficients, dataset, index, initial_values=original_data.astype('float32'), **kwargs) initial_error_name = "_init_error_%s" % self.outcome_attribute.get_alias() if initial_error_name not in dataset.get_known_attribute_names(): initial_error = original_data - outcome dataset.add_primary_attribute(name=initial_error_name, data=zeros(dataset.size(), dtype="float32")) exclude_missing_values = self.run_config.get("exclude_missing_values_from_initial_error", False) exclude_outliers = self.run_config.get("exclude_outliers_from_initial_error", False) if exclude_missing_values: missing_value = self.run_config.get("outcome_attribute_missing_value", 0) initial_error[original_data == missing_value] = 0 logger.log_status('Values equal %s were excluded from adding residuals.' % missing_value) if exclude_outliers: outlier_low = self.run_config.get("outlier_is_less_than", 0) initial_error[original_data < outlier_low] = 0 outlier_high = self.run_config.get("outlier_is_greater_than", original_data.max()) initial_error[original_data > outlier_high] = 0 logger.log_status('Values less than %s and larger than %s were excluded from adding residuals.' % (outlier_low, outlier_high)) dataset.set_values_of_one_attribute(initial_error_name, initial_error, index) else: initial_error = dataset.get_attribute_by_index(initial_error_name, index) return outcome + initial_error
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0): """ For info on the arguments see RegressionModel. """ regression_outcome = RegressionModel.run(self, specification, coefficients, dataset, index=index, chunk_specification=chunk_specification, data_objects=data_objects, run_config=run_config, debuglevel=debuglevel) if (regression_outcome == None) or (regression_outcome.size <=0): return regression_outcome if index == None: index = arange(dataset.size()) result = exp(regression_outcome) result = result/(1.0+result) if (self.attribute_to_modify not in dataset.get_known_attribute_names()): dataset.add_attribute(name=self.attribute_to_modify, data=zeros((dataset.size(),), dtype=float32)) dataset.set_values_of_one_attribute(self.attribute_to_modify, result, index) return result
def run(self, specification, coefficients, dataset, index=None, **kwargs): if index is None: index = arange(dataset.size()) data_objects = kwargs.get("data_objects", {}) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) # filter out agents for this group new_index = self.group_member.get_index_of_my_agents( dataset, index, dataset_pool=self.dataset_pool) regresult = RegressionModel.run(self, specification, coefficients, dataset, index=index[new_index], **kwargs) result = zeros(index.size, dtype=float32) result[new_index] = regresult return result
def prepare_for_run(self, add_member_prefix=True, specification_storage=None, specification_table=None, coefficients_storage=None, coefficients_table=None, **kwargs): if add_member_prefix: specification_table, coefficients_table = \ self.group_member.add_member_prefix_to_table_names([specification_table, coefficients_table]) return RegressionModel.prepare_for_run( self, specification_storage=specification_storage, specification_table=specification_table, coefficients_storage=coefficients_storage, coefficients_table=coefficients_table, **kwargs)
def estimate( self, specification, dataset, outcome_attribute="urbansim.gridcell.logistic_fraction_residential_land", index=None, procedure="opus_core.estimate_linear_regression", data_objects=None, estimate_config=None, debuglevel=0): return RegressionModel.estimate(self, specification, dataset, outcome_attribute, index, procedure, data_objects=data_objects, estimate_config=estimate_config, debuglevel=debuglevel)
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0): """ For info on the arguments see RegressionModel. """ if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.filter_attribute <> None: res = Resources({"debug":debuglevel}) index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool, resources=res) housing_price = RegressionModel.run(self, specification, coefficients, dataset, index, chunk_specification, run_config=run_config, debuglevel=debuglevel) if (housing_price == None) or (housing_price.size <=0): return housing_price if index == None: index = arange(dataset.size()) dataset.set_values_of_one_attribute("housing_price", housing_price, index) return
def estimate(self, specification, dataset, outcome_attribute, index=None, **kwargs): if index is None: index = arange(dataset.size()) data_objects = kwargs.get("data_objects", {}) if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) # filter out agents for this group new_index = self.group_member.get_index_of_my_agents( dataset, index, dataset_pool=self.dataset_pool) return RegressionModel.estimate(self, specification, dataset, outcome_attribute, index=index[new_index], **kwargs)
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0): """ For info on the arguments see RegressionModel. """ if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.filter_attribute <> None: res = Resources({"debug":debuglevel}) index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool, resources=res) zeroworkers = dataset.compute_variables('household.workers == 0') index_zeroworker = where(zeroworkers)[0] #Run regression model incomes = RegressionModel.run(self, specification, coefficients, dataset, index_zeroworker, chunk_specification, run_config=run_config, debuglevel=debuglevel) dataset.set_values_of_one_attribute("income", incomes, index_zeroworker) #Bump up all negative incomes to zero negative_income = dataset.compute_variables('household.income < 0') index_neg_inc = where(negative_income==1)[0] if index_neg_inc.size > 0: dataset.modify_attribute('income', zeros(index_neg_inc.size, dtype="int32"), index_neg_inc) return
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0): """ For info on the arguments see RegressionModel. """ if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.filter <> None: res = Resources({"debug":debuglevel}) index = dataset.get_filtered_index(self.filter, threshold=0, index=index, dataset_pool=self.dataset_pool, resources=res) ln_total_land_value = RegressionModel.run(self, specification, coefficients, dataset, index, chunk_specification, run_config=run_config, debuglevel=debuglevel) if (ln_total_land_value == None) or (ln_total_land_value.size <=0): return ln_total_land_value if index == None: index = arange(dataset.size()) total_land_value = exp(ln_total_land_value) residential_land_value = total_land_value * dataset.get_attribute_by_index("fraction_residential_land", index) nonresidential_land_value = total_land_value - residential_land_value dataset.set_values_of_one_attribute("residential_land_value", residential_land_value, index) dataset.set_values_of_one_attribute("nonresidential_land_value", nonresidential_land_value, index) self.post_check(dataset) return index
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0): """ For info on the arguments see RegressionModel. """ if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.filter_attribute <> None: res = Resources({"debug": debuglevel}) index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool, resources=res) housing_price = RegressionModel.run(self, specification, coefficients, dataset, index, chunk_specification, run_config=run_config, debuglevel=debuglevel) if (housing_price == None) or (housing_price.size <= 0): return housing_price if index == None: index = arange(dataset.size()) dataset.set_values_of_one_attribute("housing_price", housing_price, index) return
#"residential_units", # "avg_income", #"commercial_sqft", #"t_max", # "demand_lag1" # "sum_demand_times_2", "waterdemand.consumption_re.something_like_sum_demand", ) ) print "Create a model object" years = range(2001, 2003) # single model = RegressionModel() print "Estimate coefficients - single" coefficients, other_est_results = model.estimate(specification, consumption, outcome_attribute="waterdemand.%s.sum_demand" % consumption_type, # if outcome_attribute is opus_core.func.ln(), the simulation results need to take exp() index=index_est, procedure="opus_core.estimate_linear_regression", data_objects=dataset_pool.datasets_in_pool()) """Simulate over the set of years.""" for year in years: print "\nSimulate water demand %s" % year SimulationState().set_current_time(year) dataset_pool = SessionConfiguration().get_dataset_pool() dataset_pool.remove_all_datasets() gridcells = dataset_pool.get_dataset("gridcell")
def prepare_for_run(self, *args, **kwargs): spec, coef, dummy = RegressionModel.prepare_for_run( self, *args, **kwargs) return (spec, coef)
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0): """ For info on the arguments see RegressionModel. """ outcome_attribute_short = self.outcome_attribute.get_alias() if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.filter_attribute <> None: res = Resources({"debug": debuglevel}) index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool, resources=res) current_year = SimulationState().get_current_time() current_month = int(re.search('\d+$', outcome_attribute_short).group()) # date in YYYYMM format, matching to the id_name field of weather dataset date = int("%d%02d" % (current_year, current_month)) date = array([date] * dataset.size()) if "date" in dataset.get_known_attribute_names(): dataset.set_values_of_one_attribute("date", date) else: dataset.add_primary_attribute(date, "date") water_demand = RegressionModel.run(self, specification, coefficients, dataset, index, chunk_specification, run_config=run_config, debuglevel=debuglevel) if (water_demand == None) or (water_demand.size <= 0): return water_demand if index == None: index = arange(dataset.size()) if re.search("^ln_", outcome_attribute_short): # if the outcome attr. name starts with 'ln_' the results will be exponentiated. outcome_attribute_name = outcome_attribute_short[ 3:len(outcome_attribute_short)] water_demand = exp(water_demand) else: outcome_attribute_name = outcome_attribute_short if outcome_attribute_name in dataset.get_known_attribute_names(): dataset.set_values_of_one_attribute(outcome_attribute_name, water_demand, index) else: results = zeros(dataset.size(), dtype=water_demand.dtype) results[index] = water_demand dataset.add_primary_attribute(results, outcome_attribute_name) return water_demand
def run_after_estimation(self, *args, **kwargs): return RegressionModel.run(self, *args, **kwargs)
def run(self, specification, coefficients, dataset, index=None, **kwargs): """ See description above. If missing values of the outcome attribute are suppose to be excluded from the addition of the initial residuals, set an entry of run_config 'exclude_missing_values_from_initial_error' to True. Additionaly, an entry 'outcome_attribute_missing_value' specifies the missing value (default is 0). Similarly, if outliers are to be excluded, the run_config entry "exclude_outliers_from_initial_error" should be set to True. In such a case, run_config entries 'outlier_is_less_than' and 'outlier_is_greater_than' can define lower and upper bounds for outliers. By default, an outlier is a data point smaller than 0. There is no default upper bound. """ if self.outcome_attribute is None: raise StandardError, "An outcome attribute must be specified for this model. Pass it into the initialization." if self.outcome_attribute.get_alias( ) not in dataset.get_known_attribute_names(): try: dataset.compute_variables(self.outcome_attribute, dataset_pool=self.dataset_pool) except: raise StandardError, "The outcome attribute %s must be a known attribute of the dataset %s." % ( self.outcome_attribute.get_alias(), dataset.get_dataset_name()) if index is None: index = arange(dataset.size()) original_data = dataset.get_attribute_by_index(self.outcome_attribute, index) outcome = RegressionModel.run( self, specification, coefficients, dataset, index, initial_values=original_data.astype('float32'), **kwargs) initial_error_name = "_init_error_%s" % self.outcome_attribute.get_alias( ) if initial_error_name not in dataset.get_known_attribute_names(): initial_error = original_data - outcome dataset.add_primary_attribute(name=initial_error_name, data=zeros(dataset.size(), dtype="float32")) exclude_missing_values = self.run_config.get( "exclude_missing_values_from_initial_error", False) exclude_outliers = self.run_config.get( "exclude_outliers_from_initial_error", False) if exclude_missing_values: missing_value = self.run_config.get( "outcome_attribute_missing_value", 0) initial_error[original_data == missing_value] = 0 logger.log_status( 'Values equal %s were excluded from adding residuals.' % missing_value) if exclude_outliers: outlier_low = self.run_config.get("outlier_is_less_than", 0) initial_error[original_data < outlier_low] = 0 outlier_high = self.run_config.get("outlier_is_greater_than", original_data.max()) initial_error[original_data > outlier_high] = 0 logger.log_status( 'Values less than %s and larger than %s were excluded from adding residuals.' % (outlier_low, outlier_high)) dataset.set_values_of_one_attribute(initial_error_name, initial_error, index) else: initial_error = dataset.get_attribute_by_index( initial_error_name, index) return outcome + initial_error
def estimate(self, specification, dataset, outcome_attribute="urbansim.gridcell.logistic_fraction_residential_land", index = None, procedure="opus_core.estimate_linear_regression", data_objects=None, estimate_config=None, debuglevel=0): return RegressionModel.estimate(self, specification, dataset, outcome_attribute, index, procedure, data_objects=data_objects, estimate_config=estimate_config, debuglevel=debuglevel)
def prepare_for_run(self, dataset_pool, create_proposal_set=True, parcel_filter_for_new_development=None, parcel_filter_for_redevelopment=None, template_filter=None, spec_replace_module_variable_pair=None, proposed_units_variable="urbansim_parcel.development_project_proposal.units_proposed", **kwargs): """create development project proposal dataset from parcels and development templates. spec_replace_module_variable_pair is a tuple with two elements: module name, variable within the module that contans a dictionary of model variables to be replaced in the specification. """ specification, coefficients, dummy = RegressionModel.prepare_for_run(self, **kwargs) try: existing_proposal_set_parent = dataset_pool.get_dataset('development_project_proposal') #load proposals whose status_id are not of id_tentative or id_not_available available_idx = where(logical_and(existing_proposal_set_parent.get_attribute("status_id") != DevelopmentProjectProposalDataset.id_tentative, existing_proposal_set_parent.get_attribute("status_id") != DevelopmentProjectProposalDataset.id_not_available))[0] existing_proposal_set = DatasetSubset(existing_proposal_set_parent, available_idx) # Code updated by Hanyi Li, MAG 6/8/2010 # Replacing the cached 'development_project_proposal' dataset with # the filtered dataset 'existing_proposal_set' dataset_pool.replace_dataset(existing_proposal_set_parent.get_dataset_name(), existing_proposal_set) except: existing_proposal_set = None parcels = dataset_pool.get_dataset('parcel') templates = dataset_pool.get_dataset('development_template') # It is important that during this method no variable flushing happens, since # we create datasets of the same name for different purposes (new development and redevelopment) # and flushing would mix them up flush_variables_current = SessionConfiguration().get('flush_variables', False) SessionConfiguration().put_data({'flush_variables': False}) # Code added by Jesse Ayers, MAG, 9/14/2009 # Getting an index of parcels that have actively developing projects (those on a velocity function) # and making sure that new proposals are not generated for them if existing_proposal_set: parcels_with_proposals = existing_proposal_set.get_attribute('parcel_id') parcels_with_proposals_idx = parcels.get_id_index(parcels_with_proposals) if parcel_filter_for_new_development is not None: if parcel_filter_for_new_development[parcel_filter_for_new_development.find('=')+1] == '=': filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development else: parcel_filter_for_new_development = parcel_filter_for_new_development[parcel_filter_for_new_development.find('=')+1:].lstrip() filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development index1 = where(parcels.compute_variables(filter))[0] else: if parcel_filter_for_new_development is not None: index1 = where(parcels.compute_variables(parcel_filter_for_new_development))[0] else: index1 = None if template_filter is not None: try: index2 = where(templates.compute_variables(template_filter))[0] except Exception, e: logger.log_warning( "template_filter is set to %s, but there is an error when computing it: %s" % (template_filter, e) ) index2 = None
def prepare_for_estimate(self, add_member_prefix=True, specification_dict=None, specification_storage=None, specification_table=None, **kwargs): if add_member_prefix: specification_table = self.group_member.add_member_prefix_to_table_names([specification_table]) return RegressionModel.prepare_for_estimate(specification_dict=specification_dict, specification_storage=specification_storage, specification_table=specification_table, **kwargs)
def run(self, specification, coefficients, dataset, index=None, chunk_specification=None, data_objects=None, run_config=None, debuglevel=0): """ For info on the arguments see RegressionModel. """ if data_objects is not None: self.dataset_pool.add_datasets_if_not_included(data_objects) if self.filter_attribute <> None: res = Resources({"debug":debuglevel}) index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool, resources=res) ##Initialize income of 2-person households that the hh-formation models have assigned a brand new household id. new_2household_ids = dataset.compute_variables('(household.income==(-2))') initialize_2income = where(new_2household_ids == 1)[0] if initialize_2income.size > 0: dataset.modify_attribute('income', dataset.compute_variables('(((household.workers)*18593) + ((household.aggregate(person.education, function=mean))*11293) + ((household.aggregate(person.age, function=mean))*889) - 95508)')[initialize_2income], initialize_2income) ##Initialize income of 1-person households that the hh-dissolution models have assigned a brand new household id. new_1household_ids = dataset.compute_variables('(household.income==(-1))') initialize_1income = where(new_1household_ids == 1)[0] if initialize_1income.size > 0: dataset.modify_attribute('income', dataset.compute_variables('(((household.workers)*24000) + ((household.aggregate(person.education, function=mean))*5590) + ((household.aggregate(person.age, function=mean))*583) - 51957)')[initialize_1income], initialize_1income) ##Initialize income of 3-person households that the hh-formation models have assigned a brand new household id. new_3household_ids = dataset.compute_variables('(household.income==(-3))') initialize_3income = where(new_3household_ids == 1)[0] if initialize_3income.size > 0: dataset.modify_attribute('income', dataset.compute_variables('(((household.workers)*20078) + ((household.aggregate(person.education, function=mean))*8531) + ((household.aggregate(person.age, function=mean))*861) - 72319)')[initialize_3income], initialize_3income) ##Initialize income of 4-person households that the hh-formation models have assigned a brand new household id. new_4household_ids = dataset.compute_variables('(household.income==(-4))') initialize_4income = where(new_4household_ids == 1)[0] if initialize_4income.size > 0: dataset.modify_attribute('income', dataset.compute_variables('(((household.workers)*21883) + ((household.aggregate(person.education, function=mean))*9656) + ((household.aggregate(person.age, function=mean))*1806) - 112131)')[initialize_4income], initialize_4income) ##Initialize income of 5-person households that the hh-formation models have assigned a brand new household id. new_5household_ids = dataset.compute_variables('(household.income==(-5))') initialize_5income = where(new_5household_ids == 1)[0] if initialize_5income.size > 0: dataset.modify_attribute('income', dataset.compute_variables('(((household.workers)*8797) + ((household.aggregate(person.education, function=mean))*9049) + ((household.aggregate(person.age, function=mean))*670) - 27224)')[initialize_5income], initialize_5income) negative_income = dataset.compute_variables('household.income < 0') index_neg_inc = where(negative_income==1)[0] if index_neg_inc.size > 0: dataset.modify_attribute('income', zeros(index_neg_inc.size, dtype="int32"), index_neg_inc) #Run regression model- all coefficients are applied here except macro employment growth, which comes next incomes = RegressionModel.run(self, specification, coefficients, dataset, index, chunk_specification, run_config=run_config, debuglevel=debuglevel) #Add to the regression equation the term for employment growth (this year's jobs / last year's jobs). Job totals from the control total dataset. # current_year = SimulationState().get_current_time() # if current_year == 2010: # term_to_add = 1.04*1.82 #322729 #319190.3 # else: # base_year = '2009' # base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year) # control_totals = ControlTotalDataset(in_storage=base_cache_storage, in_table_name="annual_employment_control_totals") # number_of_jobs = control_totals.get_attribute("number_of_jobs") # idx_current = where(control_totals.get_attribute("year")==current_year)[0] # jobs_current = number_of_jobs[idx_current] # idx_previous = where(control_totals.get_attribute("year")==(current_year-1))[0] # jobs_previous = number_of_jobs[idx_previous] # emp_ratio = ((jobs_current.sum())*1.0)/(jobs_previous.sum()) # logger.log_status("empratio: %s" % (emp_ratio)) # term_to_add = emp_ratio * 1.82 # incomes = incomes + term_to_add incomes = exp(incomes) if (incomes == None) or (incomes.size <=0): return incomes if index == None: index = arange(dataset.size()) dataset.set_values_of_one_attribute("income", incomes, index) #Bump up all negative incomes to zero negative_income = dataset.compute_variables('household.income < 0') index_neg_inc = where(negative_income==1)[0] if index_neg_inc.size > 0: dataset.modify_attribute('income', zeros(index_neg_inc.size, dtype="int32"), index_neg_inc) ##Add code to bump down all incomes above 3million too_high_income = dataset.compute_variables('household.income > 5000000') index_too_high_income = where(too_high_income==1)[0] if index_too_high_income.size > 0: dataset.modify_attribute('income', array(index_too_high_income.size*[5000000]), index_too_high_income) return
def prepare_for_run( self, dataset_pool, create_proposal_set=True, parcel_filter_for_new_development=None, parcel_filter_for_redevelopment=None, template_filter=None, spec_replace_module_variable_pair=None, proposed_units_variable="urbansim_parcel.development_project_proposal.units_proposed", **kwargs): """create development project proposal dataset from parcels and development templates. spec_replace_module_variable_pair is a tuple with two elements: module name, variable within the module that contans a dictionary of model variables to be replaced in the specification. """ specification, coefficients, dummy = RegressionModel.prepare_for_run( self, **kwargs) try: existing_proposal_set_parent = dataset_pool.get_dataset( 'development_project_proposal') #load proposals whose status_id are not of id_tentative or id_not_available available_idx = where( logical_and( existing_proposal_set_parent.get_attribute("status_id") != DevelopmentProjectProposalDataset.id_tentative, existing_proposal_set_parent.get_attribute("status_id") != DevelopmentProjectProposalDataset.id_not_available))[0] existing_proposal_set = DatasetSubset(existing_proposal_set_parent, available_idx) # Code updated by Hanyi Li, MAG 6/8/2010 # Replacing the cached 'development_project_proposal' dataset with # the filtered dataset 'existing_proposal_set' dataset_pool.replace_dataset( existing_proposal_set_parent.get_dataset_name(), existing_proposal_set) except: existing_proposal_set = None parcels = dataset_pool.get_dataset('parcel') templates = dataset_pool.get_dataset('development_template') # It is important that during this method no variable flushing happens, since # we create datasets of the same name for different purposes (new development and redevelopment) # and flushing would mix them up flush_variables_current = SessionConfiguration().get( 'flush_variables', False) SessionConfiguration().put_data({'flush_variables': False}) # Code added by Jesse Ayers, MAG, 9/14/2009 # Getting an index of parcels that have actively developing projects (those on a velocity function) # and making sure that new proposals are not generated for them if existing_proposal_set: parcels_with_proposals = existing_proposal_set.get_attribute( 'parcel_id') parcels_with_proposals_idx = parcels.get_id_index( parcels_with_proposals) if parcel_filter_for_new_development is not None: if parcel_filter_for_new_development[ parcel_filter_for_new_development.find('=') + 1] == '=': filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development else: parcel_filter_for_new_development = parcel_filter_for_new_development[ parcel_filter_for_new_development.find('=') + 1:].lstrip() filter = 'flter = numpy.logical_and(parcel.number_of_agents(development_project_proposal) == 0, %s)' % parcel_filter_for_new_development index1 = where(parcels.compute_variables(filter))[0] else: if parcel_filter_for_new_development is not None: index1 = where( parcels.compute_variables( parcel_filter_for_new_development))[0] else: index1 = None if template_filter is not None: try: index2 = where(templates.compute_variables(template_filter))[0] except Exception, e: logger.log_warning( "template_filter is set to %s, but there is an error when computing it: %s" % (template_filter, e)) index2 = None
compute_capacity_flag=True, capacity_string=vacant_capacity, number_of_agents_string=number_of_agents, number_of_units_string="capacity", run_config={"lottery_max_iterations":10}) seed(1) result = hlcm2.run(specification, coefficients, households) #coef, results = hlcm2.estimate(specification, agent_set=households, debuglevel=1) #hlcm2.plot_choice_histograms(capacity=locations.get_attribute("capacity")) households.get_attribute("location") # Regression model locations.add_primary_attribute(name="distance_to_cbd", data=[5,10,5,1,20,0,7,7,3]) from opus_core.regression_model import RegressionModel rm = RegressionModel(regression_procedure="opus_core.linear_regression") specification = EquationSpecification( variables=array(["constant", "gridcell.distance_to_cbd"]), coefficients=array(["constant", "dcbd_coef"])) coef, other_results = rm.estimate(specification, dataset=locations, outcome_attribute="gridcell.cost", procedure="opus_core.estimate_linear_regression") coef.summary() dstorage = StorageFactory().get_storage('dict_storage') dstorage.write_table( table_name = 'gridcells', table_data = {'id':array([1,2,3,4]), 'distance_to_cbd':array([2,4,6,8]) })
def prepare_for_run(self, *args, **kwargs): spec, coef, dummy = RegressionModel.prepare_for_run(self, *args, **kwargs) return (spec, coef)