Esempio n. 1
0
    def generate_posterior_distribution(
            self,
            year,
            quantity_of_interest,
            cache_directory=None,
            values=None,
            ids=None,
            procedure="opus_core.bm_normal_posterior",
            use_bias_and_variance_from=None,
            transformed_back=True,
            transformation_pair=(None, None),
            **kwargs):
        if cache_directory is not None:
            self.cache_set = array([cache_directory])
            #self.set_cache_attributes(cache_directory)
        else:
            if values is None or ids is None:
                raise StandardError, "Either cache_directory or values and ids must be given."

        self.set_posterior(
            year,
            quantity_of_interest,
            values=values,
            ids=ids,
            use_bias_and_variance_from=use_bias_and_variance_from,
            transformation_pair=transformation_pair)
        procedure_class = ModelComponentCreator().get_model_component(
            procedure)
        self.simulated_values = procedure_class.run(self, **kwargs)
        if transformed_back and (self.transformation_pair_for_prediction[0]
                                 is not None):  # need to transform back
            self.simulated_values = try_transformation(
                self.simulated_values,
                self.transformation_pair_for_prediction[1])
        return self.simulated_values
Esempio n. 2
0
    def generate_posterior_distribution(
            self,
            year,
            quantity_of_interest,
            procedure="opus_core.bm_normal_posterior",
            use_bias_and_variance_from=None,
            transformed_back=True,
            aggregate_to=None,
            intermediates=[],
            **kwargs):
        """
        'quantity_of_interest' is a variable name about which we want to get the posterior distribution.
        If there is multiple known_output, it must be made clear from which one the bias and variance
        is to be used (argument use_bias_and_variance_from) If it is None, the first known output is used.
        """
        self.set_posterior(year, quantity_of_interest,
                           use_bias_and_variance_from)
        procedure_class = ModelComponentCreator().get_model_component(
            procedure)
        self.simulated_values = procedure_class.run(self, **kwargs)
        if transformed_back and (self.transformation_pair_for_prediction[0]
                                 is not None):  # need to transform back
            self.simulated_values = try_transformation(
                self.simulated_values,
                self.transformation_pair_for_prediction[1])
        if aggregate_to is not None:
            self.simulated_values = self.aggregate(
                self.simulated_values,
                aggregate_from=VariableName(
                    quantity_of_interest).get_dataset_name(),
                aggregate_to=aggregate_to,
                intermediates=intermediates)

        return self.simulated_values
Esempio n. 3
0
 def compute_weights(self, procedure="opus_core.bm_normal_weights", **kwargs):
     """ Launches the run method of the given 'procedure'. This should return the actual BM weights.
     The method passes self as first argument into the run method.
     If 'procedure' is not given, the method returns equal weights.
     """
     self.compute_y()
     self.estimate_mu()
     self.estimate_bias()
     self.estimate_variance()
     if procedure is not None:
         procedure_class = ModelComponentCreator().get_model_component(procedure)
         self.weights, self.weight_components = procedure_class.run(self, **kwargs)
     else:
         self.weights = 1.0/self.number_of_runs * ones(self.number_of_runs)
     write_to_text_file(os.path.join(self.output_directory, self.weights_file_name),
                        self.weights)
     return self.weights
Esempio n. 4
0
    def generate_posterior_distribution(self, year, quantity_of_interest, cache_directory=None, values=None, ids=None, 
                                        procedure="opus_core.bm_normal_posterior", use_bias_and_variance_from=None, 
                                        transformed_back=True, aggregate_to=None,
                                        intermediates=[], propagation_factor=1, no_propagation=True, additive_propagation=False, 
                                        omit_bias=False, **kwargs):

        if (values is None or ids is None) and (self.cache_set is None):
            raise StandardError, "values and ids must be give if the BM object is initialized without cache_file_location."
            
        self.set_posterior(year, quantity_of_interest, values=values, ids=ids, use_bias_and_variance_from=use_bias_and_variance_from, 
                           propagation_factor=propagation_factor, 
                           no_propagation=no_propagation, additive_propagation=additive_propagation, omit_bias=omit_bias)
        procedure_class = ModelComponentCreator().get_model_component(procedure)
        self.simulated_values = procedure_class.run(self, **kwargs)
        if transformed_back and (self.transformation_pair_for_prediction[0] is not None): # need to transform back
            self.simulated_values = try_transformation(self.simulated_values,
                                                       self.transformation_pair_for_prediction[1])
        self.simulated_values_ids = self.m_ids
        if aggregate_to is not None:
            (self.simulated_values, self.simulated_values_ids) = self.aggregate(self.simulated_values, 
                                                   aggregate_from=VariableName(quantity_of_interest).get_dataset_name(),
                                                   aggregate_to=aggregate_to, intermediates=intermediates)
        return self.simulated_values
Esempio n. 5
0
    def estimate(self,
                 specification,
                 dataset,
                 outcome_attribute,
                 index=None,
                 procedure=None,
                 data_objects=None,
                 estimate_config=None,
                 debuglevel=0):
        """'specification' is of type EquationSpecification,
            'dataset' is of type Dataset,
            'outcome_attribute' - string that determines the dependent variable,
            'index' are indices of individuals in dataset for which
                    the model runs. If it is None, the whole dataset is considered.
            'procedure' - name of the estimation procedure. If it is None,
                there should be an entry "estimation" in 'estimate_config' that determines the procedure. The class
                must have a method 'run' that takes as arguments 'data', 'regression_procedure' and 'resources'.
                It returns a dictionary with entries 'estimators', 'standard_errors' and 't_values' (all 1D numpy arrays).
            'data_objects' is a dictionary where each key is the name of an data object
                    ('zone', ...) and its value is an object of class  Dataset.
            'estimate_config' is of type Resources, it gives additional arguments for the estimation procedure.
            'debuglevel' overwrites the class 'debuglevel'.
        """
        #import wingdbstub
        self.debug.flag = debuglevel
        if estimate_config == None:
            estimate_config = Resources()
        if not isinstance(estimate_config, Resources) and isinstance(
                estimate_config, dict):
            estimate_config = Resources(estimate_config)
        self.estimate_config = estimate_config.merge_with_defaults(
            self.estimate_config)
        if data_objects is not None:
            self.dataset_pool.add_datasets_if_not_included(data_objects)
        self.procedure = procedure
        if self.procedure == None:
            self.procedure = self.estimate_config.get("estimation", None)
        if self.procedure is not None:
            self.procedure = ModelComponentCreator().get_model_component(
                self.procedure)
        else:
            logger.log_warning(
                "No estimation procedure given, or problems with loading the corresponding module."
            )

        compute_resources = Resources({"debug": self.debug})
        if dataset.size() <= 0:  # no data loaded yet
            dataset.get_id_attribute()
        if index == None:
            index = arange(dataset.size())
        if not isinstance(index, ndarray):
            index = array(index)

        estimation_size_agents = self.estimate_config.get(
            "estimation_size_agents",
            None)  # should be a proportion of the agent_set
        if estimation_size_agents == None:
            estimation_size_agents = 1.0
        else:
            estimation_size_agents = max(min(estimation_size_agents, 1.0),
                                         0.0)  # between 0 and 1

        if estimation_size_agents < 1.0:
            self.debug.print_debug("Sampling agents for estimation ...", 3)
            estimation_idx = sample_noreplace(
                arange(index.size), int(index.size * estimation_size_agents))
        else:
            estimation_idx = arange(index.size)

        estimation_idx = index[estimation_idx]
        self.debug.print_debug(
            "Number of observations for estimation: " +
            str(estimation_idx.size), 2)
        if estimation_idx.size <= 0:
            self.debug.print_debug("Nothing to be done.", 2)
            return (None, None)

        coefficients = create_coefficient_from_specification(specification)
        specified_coefficients = SpecifiedCoefficients().create(coefficients,
                                                                specification,
                                                                neqs=1)
        submodels = specified_coefficients.get_submodels()
        self.get_status_for_gui().update_pieces_using_submodels(
            submodels=submodels, leave_pieces=2)
        self.map_agents_to_submodels(
            submodels,
            self.submodel_string,
            dataset,
            estimation_idx,
            dataset_pool=self.dataset_pool,
            resources=compute_resources,
            submodel_size_max=self.estimate_config.get('submodel_size_max',
                                                       None))
        variables = specified_coefficients.get_full_variable_names_without_constants(
        )
        self.debug.print_debug("Compute variables ...", 4)
        self.increment_current_status_piece()
        dataset.compute_variables(variables,
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)

        coef = {}
        estimated_coef = {}
        self.outcome = {}
        dataset.compute_variables([outcome_attribute],
                                  dataset_pool=self.dataset_pool,
                                  resources=compute_resources)
        regression_resources = Resources(estimate_config)
        regression_resources.merge({"debug": self.debug})
        outcome_variable_name = VariableName(outcome_attribute)
        for submodel in submodels:
            coef[submodel] = SpecifiedCoefficientsFor1Submodel(
                specified_coefficients, submodel)
            self.increment_current_status_piece()
            logger.log_status("Estimate regression for submodel " +
                              str(submodel),
                              tags=["estimate"],
                              verbosity_level=2)
            logger.log_status("Number of observations: " +
                              str(self.observations_mapping[submodel].size),
                              tags=["estimate"],
                              verbosity_level=2)
            self.data[
                submodel] = dataset.create_regression_data_for_estimation(
                    coef[submodel],
                    index=estimation_idx[self.observations_mapping[submodel]])
            self.coefficient_names[submodel] = coef[
                submodel].get_coefficient_names_without_constant()[0, :]
            if (self.data[submodel].shape[0] > 0
                ) and (self.data[submodel].size > 0) and (
                    self.procedure
                    is not None):  # observations for this submodel available
                self.outcome[submodel] = dataset.get_attribute_by_index(
                    outcome_variable_name.get_alias(),
                    estimation_idx[self.observations_mapping[submodel]])
                regression_resources.merge({"outcome": self.outcome[submodel]})
                regression_resources.merge({
                    "coefficient_names":
                    self.coefficient_names[submodel].tolist(),
                    "constant_position":
                    coef[submodel].get_constants_positions()
                })
                estimated_coef[submodel] = self.procedure.run(
                    self.data[submodel],
                    self.regression,
                    resources=regression_resources)
                if "estimators" in estimated_coef[submodel].keys():
                    coef[submodel].set_coefficient_values(
                        estimated_coef[submodel]["estimators"])
                if "standard_errors" in estimated_coef[submodel].keys():
                    coef[submodel].set_standard_errors(
                        estimated_coef[submodel]["standard_errors"])
                if "other_measures" in estimated_coef[submodel].keys():
                    for measure in estimated_coef[submodel][
                            "other_measures"].keys():
                        coef[submodel].set_measure(
                            measure, estimated_coef[submodel]["other_measures"]
                            [measure])
                if "other_info" in estimated_coef[submodel].keys():
                    for info in estimated_coef[submodel]["other_info"]:
                        coef[submodel].set_other_info(
                            info, estimated_coef[submodel]["other_info"][info])
        coefficients.fill_coefficients(coef)

        self.save_predicted_values_and_errors(specification,
                                              coefficients,
                                              dataset,
                                              outcome_variable_name,
                                              index=index,
                                              data_objects=data_objects)

        return (coefficients, estimated_coef)