Esempio n. 1
0
    def run(self, specification, coefficients, dataset, index=None, chunk_specification=None,
            data_objects=None, run_config=None, debuglevel=0, **kwargs):
        """ For info on the arguments see RegressionModel.
        """
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if self.filter_attribute <> None:
            res = Resources({"debug":debuglevel})
            index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool,
                                               resources=res)
        outcome = RegressionModelWithAdditionInitialResiduals.run(self, specification, coefficients, dataset,
                                                                 index, chunk_specification=chunk_specification,
                                                                 run_config=run_config, debuglevel=debuglevel,
                                                                 **kwargs)
        if (outcome == None) or (outcome.size <= 0):
            return outcome
        if index == None:
            index = arange(dataset.size())
        if re.search("^ln_", self.outcome_attribute.get_alias()): # if the outcome attr. name starts with 'ln_'
                                                      # the results will be exponentiated.
            outcome_attribute_name = self.outcome_attribute.get_alias()[3:len(self.outcome_attribute.get_alias())]
            outcome = exp(outcome)
        else:
            outcome_attribute_name = self.outcome_attribute.get_alias()
        if outcome_attribute_name in dataset.get_known_attribute_names():
            values = dataset.get_attribute(outcome_attribute_name).copy()
            dataset.delete_one_attribute(outcome_attribute_name)
        else:
            values = zeros(dataset.size(), dtype='f')

        values[index] = outcome.astype(values.dtype)
        dataset.add_primary_attribute(name=outcome_attribute_name, data=values)
        self.correct_infinite_values(dataset, outcome_attribute_name, clip_all_larger_values=True)
        return outcome
Esempio n. 2
0
 def prepare_for_estimate(self, specification_dict = None, specification_storage=None,
                           specification_table=None, dataset=None,
                           filter_variable="unit_price",
                           threshold=0):
     return RegressionModelWithAdditionInitialResiduals.prepare_for_estimate(self, dataset=dataset, dataset_filter=filter_variable,
                                                                             filter_threshold=threshold, specification_dict=specification_dict, 
                                                                             specification_storage=specification_storage,
                                                                             specification_table=specification_table)
Esempio n. 3
0
 def estimate(self, specification, dataset, outcome_attribute="unit_price", index = None,
                     procedure="opus_core.estimate_linear_regression", data_objects=None,
                     estimate_config=None,  debuglevel=0):
     if data_objects is not None:
         self.dataset_pool.add_datasets_if_not_included(data_objects)
     if self.filter_attribute <> None:
         res = Resources({"debug":debuglevel})
         index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool,
                                            resources=res)
     return RegressionModelWithAdditionInitialResiduals.estimate(self, specification, dataset, outcome_attribute, index, procedure,
                                  estimate_config=estimate_config, debuglevel=debuglevel)
Esempio n. 4
0
 def __init__(self, regression_procedure="opus_core.linear_regression",
              filter_attribute=None,
              submodel_string="building_type_id",
              outcome_attribute = "unit_price",
              run_config=None,
              estimate_config=None,
              debuglevel=0, 
              model_name=None,
              model_short_name=None,
              dataset_pool=None):
     if model_name is not None:
         self.model_name = model_name
     if model_short_name is not None:
         self.model_short_name = model_short_name
     self.filter_attribute = filter_attribute
     RegressionModelWithAdditionInitialResiduals.__init__(self,
                              regression_procedure=regression_procedure,
                              submodel_string=submodel_string,
                              outcome_attribute = outcome_attribute,
                              run_config=run_config,
                              estimate_config=estimate_config,
                              debuglevel=debuglevel, dataset_pool=dataset_pool)
Esempio n. 5
0
    def run(self, specification, coefficients, dataset, index=None, chunk_specification=None,
            years = 4,
            data_objects=None, run_config=None, debuglevel=0):
        """ For info on the arguments see RegressionModel.
        """
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        if self.filter_attribute <> None:
            res = Resources({"debug":debuglevel})
            index = dataset.get_filtered_index(self.filter_attribute, threshold=0, index=index, dataset_pool=self.dataset_pool,
                                               resources=res)
        init_outcome = RegressionModelWithAdditionInitialResiduals.run(self, specification, coefficients, dataset,
                                         index, chunk_specification=chunk_specification,
                                         run_config=run_config, debuglevel=debuglevel)

        initial_error_name = "_init_error_%s" % self.outcome_attribute.get_alias()
        initial_error = dataset[initial_error_name][index]
        mean = init_outcome - initial_error

        rmse = dataset.compute_variables("paris.establishment.rmse_ln_emp_ratio")
        _epsilon = norm.rvs(location=0, scale=rmse) / years  # convert lump prediction to annual prediction
        _epsilon_name = "_epsilon_%s" % self.outcome_attribute.get_alias()

        if _epsilon_name not in dataset.get_known_attribute_names():
            dataset.add_primary_attribute(name=_epsilon_name, data=zeros(dataset.size(), dtype="float32"))
        dataset.set_values_of_one_attribute(_epsilon_name, _epsilon, index)
        outcome = mean + _epsilon[index]

        if (outcome == None) or (outcome.size <= 0):
            return outcome
        if index == None:
            index = arange(dataset.size())
    
        if re.search("^ln_", self.outcome_attribute.get_alias()): # if the outcome attr. name starts with 'ln_'
                                                      # the results will be exponentiated.
            outcome_attribute_name = self.outcome_attribute.get_alias()[3:len(self.outcome_attribute.get_alias())]
            outcome = exp(outcome)
        else:
            outcome_attribute_name = self.outcome_attribute.get_alias()
        if outcome_attribute_name in dataset.get_known_attribute_names():
            values = dataset.get_attribute(outcome_attribute_name).copy()
            dataset.delete_one_attribute(outcome_attribute_name)
        else:
            values = zeros(dataset.size(), dtype='f')

        values[index] = outcome.astype(values.dtype)
        dataset.add_primary_attribute(name=outcome_attribute_name, data=values)
        self.correct_infinite_values(dataset, outcome_attribute_name, clip_all_larger_values=True)
        return outcome
Esempio n. 6
0
 def prepare_for_run(self, *args, **kwargs):
     spec, coef, dummy = RegressionModelWithAdditionInitialResiduals.prepare_for_run(self, *args, **kwargs)
     return (spec, coef)