def simulate_from_normal(self, variable, n=1, bias=0, sd=1, transformation_pair=(None, None)): """Simulates n values from the normal distribution for each value of self.values_from_mr for the given variable: N(v+bias, sd^2), where v is (possibly) transformed matrix self.values_from_mr using the first element of transformation_pair. The second element is applied to the results. The resulting array has as many rows as self.values_from_mr. Number of columns is equal to the number of columns of self.values_from_mr * n """ if n < 1: return None values = try_transformation(self.values_from_mr[variable], transformation_pair[0]) print self.values_from_mr[variable] print values result = normal(values + bias, sd, size=self.values_from_mr[variable].shape) for i in range(1, n): result = concatenate( (result, normal(values + bias, sd, size=self.values_from_mr[variable].shape)), axis=1) print result print try_transformation(result, transformation_pair[1]) return try_transformation(result, transformation_pair[1])
def _run_stochastic_test_poisson( self, function, expected_results, number_of_iterations, significance_level=0.01, transformation=None ): """ Run the given function for the specified number_of_iterations. Uses Bayesian statistics to determine whether the produced results are within the specified significance_level of the expected_results. """ K = expected_results.size sum_y = zeros(K, dtype=float32) x_kr = zeros((number_of_iterations, K), dtype=float32) for i in range(number_of_iterations): y_r = function() x_kr[i, :] = try_transformation(y_r, transformation) sum_y = sum_y + x_kr[i, :] lambdak = sum_y / float(number_of_iterations) lambdanull = try_transformation(expected_results.astype(float32), transformation) # print lambdak # print lambdanull sumxk = sum(x_kr, axis=0) LRTS = 2.0 * ( (number_of_iterations * (lambdanull - lambdak).sum()) + (ln(lambdak / ma.masked_where(lambdanull == 0, lambdanull)) * sumxk).sum() ) prob = chisqprob(LRTS, K) # print LRTS, prob logger.log_status("Stochastic Test Poisson: LRTS=" + str(LRTS) + ", df=", str(K), ", p=" + str(prob)) return (prob >= significance_level, "prob=%f < significance level of %f" % (prob, significance_level))
def compute_stochastic_test_normal(self, function, expected_results, number_of_iterations, significance_level=0.01, transformation="sqrt"): K = expected_results.size sum_y = zeros(K, dtype=float32) x_kr = zeros((number_of_iterations, K), dtype=float32) texpected_results = try_transformation(expected_results, transformation) for i in range(number_of_iterations): y_r = function() x_kr[i, :] = try_transformation(y_r, transformation) sum_y = sum_y + x_kr[i, :] muest = sum_y / float(number_of_iterations) sigma_1 = ((x_kr - muest)**2.0).sum() / float(number_of_iterations * K) self.variance = variance( x_kr, labels=reshape(array(number_of_iterations * range(1, K + 1)), (number_of_iterations, K)), index=arange(K) + 1) sigma_0 = ((x_kr - texpected_results)**2.0).sum() / float( number_of_iterations * K) LRTS = number_of_iterations * K * log(sigma_0 / sigma_1) prob = chisqprob(LRTS, K) return (K, LRTS, prob)
def estimate_mu(self): iout = -1 self.values_from_mr = {} for quantity in self.observed_data.get_quantity_objects(): dataset_name = quantity.get_dataset_name() variable = quantity.get_variable_name() iout += 1 dimension_reduced = False quantity_ids = quantity.get_dataset().get_id_attribute() for i in range(self.number_of_runs): ds = self._compute_variable_for_one_run(i, variable, dataset_name, self.get_calibration_year(), quantity) if isinstance(ds, InteractionDataset): ds = ds.get_flatten_dataset() if i == 0: # first run self.mu[iout] = zeros((self.y[iout].size, self.number_of_runs), dtype=float32) ids = ds.get_id_attribute() else: if ds.size() > ids.shape[0]: ds = DatasetSubset(ds, ds.get_id_index(ids)) dimension_reduced = True scale = self.get_scales(ds, i+1, variable) matching_index = ds.get_id_index(quantity_ids) values = scale[matching_index] * ds.get_attribute(variable)[matching_index] self.mu[iout][:,i] = try_transformation(values, quantity.get_transformation()) self.values_from_mr[variable.get_expression()] = self.mu[iout] if dimension_reduced: self.y[iout] = self.y[iout][quantity.get_dataset().get_id_index(ids)]
def _get_m_from_values(self, values, ids): self.m = values self.m_ids = ids if self.m.ndim < 2: self.m = resize(self.m, (self.m.size, 1)) self.m = try_transformation(self.m, self.transformation_pair_for_prediction[0])
def generate_posterior_distribution( self, year, quantity_of_interest, cache_directory=None, values=None, ids=None, procedure="opus_core.bm_normal_posterior", use_bias_and_variance_from=None, transformed_back=True, transformation_pair=(None, None), **kwargs): if cache_directory is not None: self.cache_set = array([cache_directory]) #self.set_cache_attributes(cache_directory) else: if values is None or ids is None: raise StandardError, "Either cache_directory or values and ids must be given." self.set_posterior( year, quantity_of_interest, values=values, ids=ids, use_bias_and_variance_from=use_bias_and_variance_from, transformation_pair=transformation_pair) procedure_class = ModelComponentCreator().get_model_component( procedure) self.simulated_values = procedure_class.run(self, **kwargs) if transformed_back and (self.transformation_pair_for_prediction[0] is not None): # need to transform back self.simulated_values = try_transformation( self.simulated_values, self.transformation_pair_for_prediction[1]) return self.simulated_values
def generate_posterior_distribution( self, year, quantity_of_interest, procedure="opus_core.bm_normal_posterior", use_bias_and_variance_from=None, transformed_back=True, aggregate_to=None, intermediates=[], **kwargs): """ 'quantity_of_interest' is a variable name about which we want to get the posterior distribution. If there is multiple known_output, it must be made clear from which one the bias and variance is to be used (argument use_bias_and_variance_from) If it is None, the first known output is used. """ self.set_posterior(year, quantity_of_interest, use_bias_and_variance_from) procedure_class = ModelComponentCreator().get_model_component( procedure) self.simulated_values = procedure_class.run(self, **kwargs) if transformed_back and (self.transformation_pair_for_prediction[0] is not None): # need to transform back self.simulated_values = try_transformation( self.simulated_values, self.transformation_pair_for_prediction[1]) if aggregate_to is not None: self.simulated_values = self.aggregate( self.simulated_values, aggregate_from=VariableName( quantity_of_interest).get_dataset_name(), aggregate_to=aggregate_to, intermediates=intermediates) return self.simulated_values
def get_data_for_quantity(self, transformed_back=True): transformation, inverse_transformation = self.observed_data.get_quantity_object_by_index( self.use_bias_and_variance_index).get_transformation_pair() if transformed_back and (transformation is not None): return try_transformation(self.y[self.use_bias_and_variance_index], inverse_transformation) return self.y[self.use_bias_and_variance_index]
def compute_m(self, year, quantity_of_interest): variable_name = VariableName(quantity_of_interest) dataset_name = variable_name.get_dataset_name() for i in range(self.number_of_runs): ds = self._compute_variable_for_one_run(i, variable_name, dataset_name, year, self.observed_data.get_quantity_object(quantity_of_interest)) if i == 0: # first run self.m = zeros((ds.size(), self.number_of_runs), dtype=float32) self.m_ids = ds.get_id_attribute() self.m[:, i] = try_transformation(ds.get_attribute(variable_name), self.transformation_pair_for_prediction[0])
def compute_stochastic_test_normal(self, function, expected_results, number_of_iterations, significance_level=0.01, transformation="sqrt"): K = expected_results.size sum_y = zeros(K, dtype=float32) x_kr = zeros((number_of_iterations, K), dtype=float32) texpected_results = try_transformation(expected_results, transformation) for i in range(number_of_iterations): y_r = function() x_kr[i,:] = try_transformation(y_r, transformation) sum_y = sum_y + x_kr[i,:] muest = sum_y/float(number_of_iterations) sigma_1 = ((x_kr - muest)**2.0).sum()/float(number_of_iterations*K) self.variance = variance(x_kr, labels=reshape(array(number_of_iterations*range(1,K+1)), (number_of_iterations,K)), index=arange(K)+1) sigma_0 = ((x_kr - texpected_results)**2.0).sum()/float(number_of_iterations*K) LRTS = number_of_iterations*K * log(sigma_0/sigma_1) prob = chisqprob(LRTS, K) return (K, LRTS, prob)
def get_exact_quantile(self, alpha, transformed_back=True, **kwargs): vars = self.get_posterior_component_variance() means = self.get_posterior_component_mean() weights = self.get_weights() sig = sqrt(vars) res = zeros(means.size) for i in range(means.size): res[i] = bmaquant(alpha, weights, means[i], sig, **kwargs) if transformed_back and (self.transformation_pair_for_prediction[0] is not None): res = try_transformation(res, self.transformation_pair_for_prediction[1]) return res
def simulate_from_normal(self, variable, n=1, bias=0, sd=1, transformation_pair=(None, None)): """Simulates n values from the normal distribution for each value of self.values_from_mr for the given variable: N(v+bias, sd^2), where v is (possibly) transformed matrix self.values_from_mr using the first element of transformation_pair. The second element is applied to the results. The resulting array has as many rows as self.values_from_mr. Number of columns is equal to the number of columns of self.values_from_mr * n """ if n < 1: return None values = try_transformation(self.values_from_mr[variable], transformation_pair[0]) print self.values_from_mr[variable] print values result = normal(values+bias, sd, size=self.values_from_mr[variable].shape) for i in range(1,n): result = concatenate((result, normal(values+bias, sd, size=self.values_from_mr[variable].shape)), axis=1) print result print try_transformation(result, transformation_pair[1]) return try_transformation(result, transformation_pair[1])
def compute_m(self, year, quantity_of_interest, values=None, ids=None): if (values is not None) and (ids is not None): self._get_m_from_values(values, ids) return variable_name = VariableName(quantity_of_interest) dataset_name = variable_name.get_dataset_name() for i in range(self.cache_set.size): ds = self._compute_variable_for_one_run(i, variable_name, dataset_name, year) if i == 0: # first run m = zeros((ds.size(), self.cache_set.size), dtype=float32) self.m_ids = ds.get_id_attribute() m[:, i] = try_transformation(ds.get_attribute(variable_name), self.transformation_pair_for_prediction[0]) self.m = resize(average(m, axis=1), (m.shape[0], 1))
def _run_stochastic_test_poisson(self, function, expected_results, number_of_iterations, significance_level=0.01, transformation=None): """ Run the given function for the specified number_of_iterations. Uses Bayesian statistics to determine whether the produced results are within the specified significance_level of the expected_results. """ K = expected_results.size sum_y = zeros(K, dtype=float32) x_kr = zeros((number_of_iterations, K), dtype=float32) for i in range(number_of_iterations): y_r = function() x_kr[i, :] = try_transformation(y_r, transformation) sum_y = sum_y + x_kr[i, :] lambdak = sum_y / float(number_of_iterations) lambdanull = try_transformation(expected_results.astype(float32), transformation) # print lambdak # print lambdanull sumxk = sum(x_kr, axis=0) LRTS = 2.0 * ( (number_of_iterations * (lambdanull - lambdak).sum()) + (ln(lambdak / ma.masked_where(lambdanull == 0, lambdanull)) * sumxk).sum()) prob = chisqprob(LRTS, K) #print LRTS, prob logger.log_status( "Stochastic Test Poisson: LRTS=" + str(LRTS) + ", df=", str(K), ", p=" + str(prob)) return (prob >= significance_level, "prob=%f < significance level of %f" % (prob, significance_level))
def generate_posterior_distribution(self, year, quantity_of_interest, cache_directory=None, values=None, ids=None, procedure="opus_core.bm_normal_posterior", use_bias_and_variance_from=None, transformed_back=True, transformation_pair = (None, None), **kwargs): if cache_directory is not None: self.cache_set = array([cache_directory]) #self.set_cache_attributes(cache_directory) else: if values is None or ids is None: raise StandardError, "Either cache_directory or values and ids must be given." self.set_posterior(year, quantity_of_interest, values=values, ids=ids, use_bias_and_variance_from=use_bias_and_variance_from, transformation_pair=transformation_pair) procedure_class = ModelComponentCreator().get_model_component(procedure) self.simulated_values = procedure_class.run(self, **kwargs) if transformed_back and (self.transformation_pair_for_prediction[0] is not None): # need to transform back self.simulated_values = try_transformation(self.simulated_values, self.transformation_pair_for_prediction[1]) return self.simulated_values
def generate_posterior_distribution(self, year, quantity_of_interest, procedure="opus_core.bm_normal_posterior", use_bias_and_variance_from=None, transformed_back=True, aggregate_to=None, intermediates=[], **kwargs): """ 'quantity_of_interest' is a variable name about which we want to get the posterior distribution. If there is multiple known_output, it must be made clear from which one the bias and variance is to be used (argument use_bias_and_variance_from) If it is None, the first known output is used. """ self.set_posterior(year, quantity_of_interest, use_bias_and_variance_from) procedure_class = ModelComponentCreator().get_model_component(procedure) self.simulated_values = procedure_class.run(self, **kwargs) if transformed_back and (self.transformation_pair_for_prediction[0] is not None): # need to transform back self.simulated_values = try_transformation(self.simulated_values, self.transformation_pair_for_prediction[1]) if aggregate_to is not None: self.simulated_values = self.aggregate(self.simulated_values, aggregate_from=VariableName(quantity_of_interest).get_dataset_name(), aggregate_to=aggregate_to, intermediates=intermediates) return self.simulated_values
def generate_posterior_distribution(self, year, quantity_of_interest, cache_directory=None, values=None, ids=None, procedure="opus_core.bm_normal_posterior", use_bias_and_variance_from=None, transformed_back=True, aggregate_to=None, intermediates=[], propagation_factor=1, no_propagation=True, additive_propagation=False, omit_bias=False, **kwargs): if (values is None or ids is None) and (self.cache_set is None): raise StandardError, "values and ids must be give if the BM object is initialized without cache_file_location." self.set_posterior(year, quantity_of_interest, values=values, ids=ids, use_bias_and_variance_from=use_bias_and_variance_from, propagation_factor=propagation_factor, no_propagation=no_propagation, additive_propagation=additive_propagation, omit_bias=omit_bias) procedure_class = ModelComponentCreator().get_model_component(procedure) self.simulated_values = procedure_class.run(self, **kwargs) if transformed_back and (self.transformation_pair_for_prediction[0] is not None): # need to transform back self.simulated_values = try_transformation(self.simulated_values, self.transformation_pair_for_prediction[1]) self.simulated_values_ids = self.m_ids if aggregate_to is not None: (self.simulated_values, self.simulated_values_ids) = self.aggregate(self.simulated_values, aggregate_from=VariableName(quantity_of_interest).get_dataset_name(), aggregate_to=aggregate_to, intermediates=intermediates) return self.simulated_values
def get_predicted_values(self, transformed_back=False): if transformed_back and (self.transformation_pair_for_prediction[0] is not None): return try_transformation(self.m, self.transformation_pair_for_prediction[1]) return self.m
def get_transformed_values(self): return try_transformation(self.get_values(), self.transformation)
def get_observed_data_by_index(self, index, transformed_back=True): transformation, inverse_transformation = self.observed_data.get_quantity_object_by_index(index).get_transformation_pair() if transformed_back and (transformation is not None): return try_transformation(self.y[index], inverse_transformation) return self.y[index]
def get_data_for_quantity(self, transformed_back=True): transformation, inverse_transformation = self.observed_data.get_quantity_object_by_index(self.use_bias_and_variance_index).get_transformation_pair() if transformed_back and (transformation is not None): return try_transformation(self.y[self.use_bias_and_variance_index], inverse_transformation) return self.y[self.use_bias_and_variance_index]