def calculate_for_contour_events(self): r""" Calculate the numerical error for each contour event. :rtype: list :returns: ``er_list``, a list of the error estimates for each contour event. """ # Calculate volumes if necessary if self.disc._input_sample_set._volumes is None: if self.disc._emulated_input_sample_set is not None: logging.warning("Using emulated points to estimate volumes.") self.disc._input_sample_set.estimate_volume_emulated(self. disc._emulated_input_sample_set) else: logging.warning("Making MC assumption to estimate volumes.") self.disc._input_sample_set.estimate_volume_mc() # Localize if necessary if self.disc._input_sample_set._volumes_local is None: self.disc._input_sample_set.global_to_local() # Loop over contour events and add contributions er_list = [] ops_num = self.disc._output_probability_set.check_num() for i in range(ops_num): if self.disc._output_probability_set._probabilities[i] > 0.0: # JiA, Ji, Jie, and JiAe are defined ast in # `Butler et al. 2015. <http://arxiv.org/pdf/1407.3851>` ind1 = np.equal(self.disc._io_ptr_local, i) ind2 = np.equal(self.disc_new._io_ptr_local, i) JiA = np.sum(self.disc._input_sample_set._volumes_local[ind1]) Ji = JiA JiAe = np.sum(self.disc._input_sample_set._volumes_local[ np.logical_and(ind1, ind2)]) Jie = np.sum(self.disc._input_sample_set._volumes_local[ind2]) JiA = comm.allreduce(JiA, op=MPI.SUM) Ji = comm.allreduce(Ji, op=MPI.SUM) JiAe = comm.allreduce(JiAe, op=MPI.SUM) Jie = comm.allreduce(Jie, op=MPI.SUM) er_list.append(self.disc._output_probability_set. _probabilities[i] * ((JiA*Jie - JiAe*Ji)/(Ji*Jie))) else: er_list.append(0.0) return er_list
def Test_model_error(self): """ Testing :meth:`bet.calculateP.calculateError.model_error` """ num = self.disc.check_nums() m_error = calculateError.model_error(self.disc) er_est = m_error.calculate_for_contour_events() s_set = self.disc._input_sample_set.copy() regions_local = np.equal(self.disc._io_ptr_local, 0) s_set.set_region_local(regions_local) s_set.local_to_global() er_est2 = m_error.calculate_for_sample_set_region(s_set, 1) self.assertAlmostEqual(er_est[0], er_est2) error_id_sum_local = np.sum( self.disc._input_sample_set._error_id_local) error_id_sum = comm.allreduce(error_id_sum_local, op=MPI.SUM) self.assertAlmostEqual(er_est2, error_id_sum) emulated_set = self.disc._input_sample_set er_est3 = m_error.calculate_for_sample_set_region( s_set, 1, emulated_set=emulated_set) self.assertAlmostEqual(er_est[0], er_est3) self.disc.set_emulated_input_sample_set(self.disc._input_sample_set) m_error = calculateError.model_error(self.disc) er_est4 = m_error.calculate_for_sample_set_region(s_set, 1) self.assertAlmostEqual(er_est[0], er_est4)
def Test_model_error(self): """ Testing :meth:`bet.calculateP.calculateError.model_error` """ num = self.disc.check_nums() m_error = calculateError.model_error(self.disc) er_est = m_error.calculate_for_contour_events() s_set = self.disc._input_sample_set.copy() regions_local = np.equal(self.disc._io_ptr_local, 0) s_set.set_region_local(regions_local) s_set.local_to_global() er_est2 = m_error.calculate_for_sample_set_region(s_set, 1) self.assertAlmostEqual(er_est[0], er_est2) error_id_sum_local = np.sum( self.disc._input_sample_set._error_id_local) error_id_sum = comm.allreduce(error_id_sum_local, op=MPI.SUM) self.assertAlmostEqual(er_est2, error_id_sum) emulated_set = self.disc._input_sample_set er_est3 = m_error.calculate_for_sample_set_region(s_set, 1, emulated_set=emulated_set) self.assertAlmostEqual(er_est[0], er_est3) self.disc.set_emulated_input_sample_set(self.disc._input_sample_set) m_error = calculateError.model_error(self.disc) er_est4 = m_error.calculate_for_sample_set_region(s_set, 1) self.assertAlmostEqual(er_est[0], er_est4)
def prob(samples, data, rho_D_M, d_distr_samples, d_Tree=None): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples}})`, the probability assoicated with a set of voronoi cells defined by the model solves at :math:`(\lambda_{samples})` where the volumes of these voronoi cells are assumed to be equal under the MC assumption. :param samples: The samples in parameter space for which the model was run. :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) :param data: The data from running the model given the samples. :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) :param rho_D_M: The simple function approximation of rho_D :type rho_D_M: :class:`~numpy.ndarray` of shape (M,) :param d_distr_samples: The samples in the data space that define a parition of D to for the simple function approximation :type d_distr_samples: :class:`~numpy.ndarray` of shape (M, mdim) :param d_Tree: :class:`~scipy.spatial.KDTree` for d_distr_samples :rtype: tuple of :class:`~numpy.ndarray` of sizes (num_samples,), (num_samples,), (ndim, num_l_emulate), (num_samples,), (num_l_emulate,) :returns: (P, lam_vol, io_ptr) where P is the probability associated with samples, and lam_vol the volumes associated with the samples, io_ptr a pointer from data to M bins. """ if len(samples.shape) == 1: samples = np.expand_dims(samples, axis=1) if len(data.shape) == 1: data = np.expand_dims(data, axis=1) if len(d_distr_samples.shape) == 1: d_distr_samples = np.expand_dims(d_distr_samples, axis=1) if type(d_Tree) == type(None): d_Tree = spatial.KDTree(d_distr_samples) # Set up local arrays for parallelism local_index = range(0+comm.rank, samples.shape[0], comm.size) samples_local = samples[local_index, :] data_local = data[local_index, :] local_array = np.array(local_index, dtype='int64') # Determine which inputs go to which M bins using the QoI (_, io_ptr) = d_Tree.query(data_local) # Apply the standard MC approximation and # calculate probabilities P_local = np.zeros((samples_local.shape[0],)) for i in range(rho_D_M.shape[0]): Itemp = np.equal(io_ptr, i) Itemp_sum = np.sum(Itemp) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) if Itemp_sum > 0: P_local[Itemp] = rho_D_M[i]/Itemp_sum P_global = util.get_global_values(P_local) global_index = util.get_global_values(local_array) P = np.zeros(P_global.shape) P[global_index] = P_global[:] lam_vol = (1.0/float(samples.shape[0]))*np.ones((samples.shape[0],)) return (P, lam_vol, io_ptr)
def prob_emulated(samples, data, rho_D_M, d_distr_samples, lambda_emulate=None, d_Tree=None): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{emulate}})`, the probability assoicated with a set of voronoi cells defined by ``num_l_emulate`` iid samples :math:`(\lambda_{emulate})`. :param samples: The samples in parameter space for which the model was run. :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) :param data: The data from running the model given the samples. :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) :param rho_D_M: The simple function approximation of rho_D :type rho_D_M: :class:`~numpy.ndarray` of shape (M,) :param d_distr_samples: The samples in the data space that define a parition of D to for the simple function approximation :type d_distr_samples: :class:`~numpy.ndarray` of shape (M, mdim) :param d_Tree: :class:`~scipy.spatial.KDTree` for d_distr_samples :param lambda_emulate: Samples used to partition the parameter space :type lambda_emulate: :class:`~numpy.ndarray` of shape (num_l_emulate, ndim) :rtype: tuple :returns: (P, lambda_emulate, io_ptr, emulate_ptr, lam_vol) """ if len(samples.shape) == 1: samples = np.expand_dims(samples, axis=1) if len(data.shape) == 1: data = np.expand_dims(data, axis=1) if type(lambda_emulate) == type(None): lambda_emulate = samples if len(d_distr_samples.shape) == 1: d_distr_samples = np.expand_dims(d_distr_samples, axis=1) if type(d_Tree) == type(None): d_Tree = spatial.KDTree(d_distr_samples) # Determine which inputs go to which M bins using the QoI (_, io_ptr) = d_Tree.query(data) # Determine which emulated samples match with which model run samples l_Tree = spatial.KDTree(samples) (_, emulate_ptr) = l_Tree.query(lambda_emulate) # Calculate Probabilties P = np.zeros((lambda_emulate.shape[0],)) d_distr_emu_ptr = np.zeros(emulate_ptr.shape) d_distr_emu_ptr = io_ptr[emulate_ptr] for i in range(rho_D_M.shape[0]): Itemp = np.equal(d_distr_emu_ptr, i) Itemp_sum = np.sum(Itemp) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) if Itemp_sum > 0: P[Itemp] = rho_D_M[i]/Itemp_sum return (P, lambda_emulate, io_ptr, emulate_ptr)
def prob_from_discretization_input(disc, set_new): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples_new}})` from :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples_old}})` where :math:`\lambda_{samples_old}` come from an input discretization. :param disc: Discretiztion on which probabilities have already been calculated :type disc: :class:`~bet.sample.discretization` :param set_new: Sample set for which probabilities will be calculated. :type set_new: :class:`~bet.sample.sample_set_base` """ if disc._emulated_input_sample_set is None: logging.warning("Using MC assumption because no emulated points given") em_set = disc._input_sample_set else: em_set = disc._emulated_input_sample_set if em_set._values_local is None: em_set.global_to_local() if em_set._probabilities_local is None: raise AttributeError("Probabilities must be pre-calculated.") # Check dimensions disc.check_nums() num_new = set_new.check_num() if (disc._input_sample_set._dim != set_new._dim): raise samp.dim_not_matching("Dimensions of sets are not equal.") (_, ptr) = set_new.query(em_set._values_local) ptr = ptr.flat[:] # Set up probability vectors prob_new = np.zeros((num_new, )) prob_em = em_set._probabilities_local for i in range(num_new): Itemp = np.equal(ptr, i) Itemp_sum = np.sum(prob_em[Itemp]) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) prob_new[i] = Itemp_sum # Set probabilities set_new.set_probabilities(prob_new) return prob_new
def prob_from_discretization_input(disc, set_new): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples_new}})` from :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples_old}})` where :math:`\lambda_{samples_old}` come from an input discretization. :param disc: Discretiztion on which probabilities have already been calculated :type disc: :class:`~bet.sample.discretization` :param set_new: Sample set for which probabilities will be calculated. :type set_new: :class:`~bet.sample.sample_set_base` """ if disc._emulated_input_sample_set is None: logging.warning("Using MC assumption because no emulated points given") em_set = disc._input_sample_set else: em_set = disc._emulated_input_sample_set if em_set._values_local is None: em_set.global_to_local() if em_set._probabilities_local is None: raise AttributeError("Probabilities must be pre-calculated.") # Check dimensions disc.check_nums() num_new = set_new.check_num() if (disc._input_sample_set._dim != set_new._dim): raise samp.dim_not_matching("Dimensions of sets are not equal.") (_, ptr) = set_new.query(em_set._values_local) ptr = ptr.flat[:] # Set up probability vectors prob_new = np.zeros((num_new,)) prob_em = em_set._probabilities_local for i in range(num_new): Itemp = np.equal(ptr, i) Itemp_sum = np.sum(prob_em[Itemp]) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) prob_new[i] = Itemp_sum # Set probabilities set_new.set_probabilities(prob_new) return prob_new
def prob_on_emulated_samples(discretization, globalize=True): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{emulate}})`, the probability associated with a set of voronoi cells defined by ``num_l_emulate`` iid samples :math:`(\lambda_{emulate})`. This is added to the emulated input sample set object. :param discretization: An object containing the discretization information. :type discretization: class:`bet.sample.discretization` :param bool globalize: Makes local variables global. """ # Check dimensions discretization.check_nums() op_num = discretization._output_probability_set.check_num() discretization._emulated_input_sample_set.check_num() # Check for necessary properties if discretization._io_ptr_local is None: discretization.set_io_ptr(globalize=True) if discretization._emulated_ii_ptr_local is None: discretization.set_emulated_ii_ptr(globalize=False) # Calculate Probabilties P = np.zeros( (discretization._emulated_input_sample_set._values_local.shape[0], )) d_distr_emu_ptr = discretization._io_ptr[ discretization._emulated_ii_ptr_local] for i in range(op_num): if discretization._output_probability_set._probabilities[i] > 0.0: Itemp = np.equal(d_distr_emu_ptr, i) Itemp_sum = np.sum(Itemp) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) if Itemp_sum > 0: P[Itemp] = discretization._output_probability_set.\ _probabilities[i] / Itemp_sum discretization._emulated_input_sample_set._probabilities_local = P if globalize: discretization._emulated_input_sample_set.local_to_global() pass
def prob_on_emulated_samples(discretization, globalize=True): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{emulate}})`, the probability assoicated with a set of voronoi cells defined by ``num_l_emulate`` iid samples :math:`(\lambda_{emulate})`. This is added to the emulated input sample set object. :param discretization: An object containing the discretization information. :type discretization: class:`bet.sample.discretization` :param bool globalize: Makes local variables global. """ # Check dimensions discretization.check_nums() op_num = discretization._output_probability_set.check_num() discretization._emulated_input_sample_set.check_num() # Check for necessary properties if discretization._io_ptr_local is None: discretization.set_io_ptr(globalize=True) if discretization._emulated_ii_ptr_local is None: discretization.set_emulated_ii_ptr(globalize=False) # Calculate Probabilties P = np.zeros((discretization._emulated_input_sample_set. _values_local.shape[0],)) d_distr_emu_ptr = discretization._io_ptr[discretization. _emulated_ii_ptr_local] for i in range(op_num): if discretization._output_probability_set._probabilities[i] > 0.0: Itemp = np.equal(d_distr_emu_ptr, i) Itemp_sum = np.sum(Itemp) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) if Itemp_sum > 0: P[Itemp] = discretization._output_probability_set.\ _probabilities[i]/Itemp_sum discretization._emulated_input_sample_set._probabilities_local = P if globalize: discretization._emulated_input_sample_set.local_to_global() pass
def prob_from_sample_set(set_old, set_new): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples_new}})` from :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples_old}})` using the MC assumption with respect to set_old. :param set_old: Sample set on which probabilities have already been calculated :type set_old: :class:`~bet.sample.sample_set_base` :param set_new: Sample set for which probabilities will be calculated. :type set_new: :class:`~bet.sample.sample_set_base` """ # Check dimensions set_old.check_num() num_new = set_new.check_num() if (set_old._dim != set_new._dim): raise samp.dim_not_matching("Dimensions of sets are not equal.") # Map old points new sets if set_old._values_local is None: set_old.global_to_local() (_, ptr) = set_new.query(set_old._values_local) ptr = ptr.flat[:] # Set up probability vector prob_new = np.zeros((num_new,)) # Loop over new cells and distribute probability from old for i in range(num_new): Itemp = np.equal(ptr, i) Itemp_sum = np.sum(set_old._probabilities_local[Itemp]) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) prob_new[i] = Itemp_sum # Set probabilities set_new.set_probabilities(prob_new) return prob_new
def prob_from_sample_set(set_old, set_new): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples_new}})` from :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples_old}})` using the MC assumption with respect to set_old. :param set_old: Sample set on which probabilities have already been calculated :type set_old: :class:`~bet.sample.sample_set_base` :param set_new: Sample set for which probabilities will be calculated. :type set_new: :class:`~bet.sample.sample_set_base` """ # Check dimensions set_old.check_num() num_new = set_new.check_num() if (set_old._dim != set_new._dim): raise samp.dim_not_matching("Dimensions of sets are not equal.") # Map old points new sets if set_old._values_local is None: set_old.global_to_local() (_, ptr) = set_new.query(set_old._values_local) ptr = ptr.flat[:] # Set up probability vector prob_new = np.zeros((num_new, )) # Loop over new cells and distribute probability from old for i in range(num_new): Itemp = np.equal(ptr, i) Itemp_sum = np.sum(set_old._probabilities_local[Itemp]) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) prob_new[i] = Itemp_sum # Set probabilities set_new.set_probabilities(prob_new) return prob_new
def prob(discretization, globalize=True): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples}})`, the probability assoicated with a set of cells defined by the model solves at :math:`(\lambda_{samples})` where the volumes of these cells are provided. :param discretization: An object containing the discretization information. :type discretization: class:`bet.sample.discretization` :param bool globalize: Makes local variables global. """ # Check Dimensions discretization.check_nums() op_num = discretization._output_probability_set.check_num() # Check for necessary attributes if discretization._io_ptr_local is None: discretization.set_io_ptr(globalize=False) # Calculate Probabilities if discretization._input_sample_set._values_local is None: discretization._input_sample_set.global_to_local() P_local = np.zeros((len(discretization._io_ptr_local),)) for i in range(op_num): if discretization._output_probability_set._probabilities[i] > 0.0: Itemp = np.equal(discretization._io_ptr_local, i) Itemp_sum = np.sum(discretization._input_sample_set. _volumes_local[Itemp]) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) if Itemp_sum > 0: P_local[Itemp] = discretization._output_probability_set.\ _probabilities[i]*discretization._input_sample_set.\ _volumes_local[Itemp]/Itemp_sum if globalize: discretization._input_sample_set._probabilities = util.\ get_global_values(P_local) discretization._input_sample_set._probabilities_local = P_local
def prob(discretization, globalize=True): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples}})`, the probability associated with a set of cells defined by the model solves at :math:`(\lambda_{samples})` where the volumes of these cells are provided. :param discretization: An object containing the discretization information. :type discretization: class:`bet.sample.discretization` :param bool globalize: Makes local variables global. """ # Check Dimensions discretization.check_nums() op_num = discretization._output_probability_set.check_num() # Check for necessary attributes if discretization._io_ptr_local is None: discretization.set_io_ptr(globalize=False) # Calculate Probabilities if discretization._input_sample_set._values_local is None: discretization._input_sample_set.global_to_local() P_local = np.zeros((len(discretization._io_ptr_local), )) for i in range(op_num): if discretization._output_probability_set._probabilities[i] > 0.0: Itemp = np.equal(discretization._io_ptr_local, i) Itemp_sum = np.sum( discretization._input_sample_set._volumes_local[Itemp]) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) if Itemp_sum > 0: P_local[Itemp] = discretization._output_probability_set.\ _probabilities[i] * discretization._input_sample_set.\ _volumes_local[Itemp] / Itemp_sum if globalize: discretization._input_sample_set._probabilities = util.\ get_global_values(P_local) discretization._input_sample_set._probabilities_local = P_local
def calculate_for_sample_set_region(self, s_set, region, emulated_set=None): r""" Calculate the sampling error bounds for a region of the input space defined by a sample set object which defines an event :math:`A`. :param s_set: sample set for which to calculate error :type s_set: :class:`bet.sample.sample_set_base` :param int region: region of s_set for which to calculate error :param emulated_set: sample set for volume emulation :type emulated_set: :class:`bet.sample_set_base` :rtype: tuple :returns: (``upper_bound``, ``lower_bound``) the upper and lower bounds for the error. """ # Set up marker self.disc._input_sample_set.local_to_global() if s_set._region is None: msg = "regions must be defined for the sample set." raise wrong_argument_type(msg) marker = np.equal(s_set._region, region) if not np.any(marker): msg = "The given region does not exist." raise wrong_argument_type(msg) # Set up discretizations if emulated_set is not None: disc = self.disc.copy() disc.set_emulated_input_sample_set(emulated_set) disc.set_emulated_ii_ptr(globalize=False) disc_new = samp.discretization(input_sample_set=s_set, output_sample_set=s_set, emulated_input_sample_set=emulated_set) disc_new.set_emulated_ii_ptr(globalize=False) elif self.disc._emulated_input_sample_set is not None: msg = "Using emulated_input_sample_set for volume emulation" logging.warning(msg) disc = self.disc if disc._emulated_ii_ptr is None: disc.set_emulated_ii_ptr(globalize=False) disc_new = samp.discretization(input_sample_set=s_set, output_sample_set=s_set, emulated_input_sample_set=self. disc. _emulated_input_sample_set) disc_new.set_emulated_ii_ptr(globalize=False) else: logging.warning("Using MC assumption for calculating volumes.") disc = self.disc.copy() disc.set_emulated_input_sample_set(disc._input_sample_set) disc.set_emulated_ii_ptr(globalize=False) disc_new = samp.discretization(input_sample_set=s_set, output_sample_set=s_set, emulated_input_sample_set=self. disc._input_sample_set) disc_new.set_emulated_ii_ptr(globalize=False) # Emulated points in the the region in_A = marker[disc_new._emulated_ii_ptr_local] upper_bound = 0.0 lower_bound = 0.0 # Loop over contour intervals and add error contributions ops_num = self.disc._output_probability_set.check_num() for i in range(ops_num): # Contribution from contour event :math:`A_{i,N}` if self.disc._output_probability_set._probabilities[i] > 0.0: indices = np.equal(disc._io_ptr, i) in_Ai = indices[disc._emulated_ii_ptr_local] # sum1 :math:`\mu_{\Lambda}(A \cap A_{i,N})` sum1 = np.sum(np.logical_and(in_A, in_Ai)) # sum2 :math:`\mu_{\Lambda}(A_{i,N})` sum2 = np.sum(in_Ai) sum1 = comm.allreduce(sum1, op=MPI.SUM) sum2 = comm.allreduce(sum2, op=MPI.SUM) if sum2 == 0.0: return (float('nan'), float('nan')) E = float(sum1)/float(sum2) in_B_N = np.zeros(in_A.shape, dtype=np.bool) for j in self.B_N[i]: in_B_N = np.logical_or(np.equal(disc. _emulated_ii_ptr_local, j), in_B_N) in_C_N = np.zeros(in_A.shape, dtype=np.bool) for j in self.C_N[i]: in_C_N = np.logical_or(np.equal(disc. _emulated_ii_ptr_local, j), in_C_N) # sum3 :math:`\mu_{\Lambda}(A \cap B_N)` sum3 = np.sum(np.logical_and(in_A, in_B_N)) # sum4 :math:`\mu_{\Lambda}(C_N)` sum4 = np.sum(in_C_N) sum3 = comm.allreduce(sum3, op=MPI.SUM) sum4 = comm.allreduce(sum4, op=MPI.SUM) if sum4 == 0.0: return (float('nan'), float('nan')) term1 = float(sum3)/float(sum4) - E # sum5 :math:`\mu_{\Lambda}(A \cap C_N)` sum5 = np.sum(np.logical_and(in_A, in_C_N)) # sum6 :math:`\mu_{\Lambda}(B_N)` sum6 = np.sum(in_B_N) sum5 = comm.allreduce(sum5, op=MPI.SUM) sum6 = comm.allreduce(sum6, op=MPI.SUM) if sum6 == 0.0: return (float('nan'), float('nan')) term2 = float(sum5)/float(sum6) - E upper_bound += self.disc._output_probability_set.\ _probabilities[i]*max(term1, term2) lower_bound += self.disc._output_probability_set.\ _probabilities[i]*min(term1, term2) return (upper_bound, lower_bound)
def prob_from_sample_set_with_emulated_volumes(set_old, set_new, set_emulate=None): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples_new}})` from :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples_old}})` using a set of emulated points are distributed with respect to the volume measure. :param set_old: Sample set on which probabilities have already been calculated :type set_old: :class:`~bet.sample.sample_set_base` :param set_new: Sample set for which probabilities will be calculated. :type set_new: :class:`~bet.sample.sample_set_base` :param set_emulate: Sample set for volume emulation :type set_emulate: :class:`~bet.sample.sample_set_base` """ if set_emulate is None: logging.warning("Using MC assumption because no emulated points given") return prob_from_sample_set(set_old, set_new) # Check dimensions num_old = set_old.check_num() num_new = set_new.check_num() set_emulate.check_num() if (set_old._dim != set_new._dim) or (set_old._dim != set_emulate._dim): raise samp.dim_not_matching("Dimensions of sets are not equal.") # Localize emulated points if set_emulate._values_local is None: set_emulate.global_to_local() # Map emulated points to old and new sets (_, ptr1) = set_old.query(set_emulate._values_local) (_, ptr2) = set_new.query(set_emulate._values_local) ptr1 = ptr1.flat[:] ptr2 = ptr2.flat[:] # Set up probability vectors prob_new = np.zeros((num_new, )) prob_em = np.zeros((len(ptr1), )) # Loop over old cells and divide probability over emulated cells warn = False for i in range(num_old): if set_old._probabilities[i] > 0.0: Itemp = np.equal(ptr1, i) Itemp_sum = np.sum(Itemp) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) if Itemp_sum > 0: prob_em[Itemp] += set_old._probabilities[i] / float(Itemp_sum) else: warn = True # Warn that some cells have no emulated points in them if warn: msg = "Some old cells have no emulated points in them. " msg += "Renormalizing probability." logging.warning(msg) total_prob = np.sum(prob_em) total_prob = comm.allreduce(total_prob, op=MPI.SUM) prob_em = prob_em / total_prob # Loop over new cells and distribute probability from emulated cells for i in range(num_new): Itemp = np.equal(ptr2, i) Itemp_sum = np.sum(prob_em[Itemp]) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) prob_new[i] = Itemp_sum # Set probabilities set_new.set_probabilities(prob_new) return prob_new
def prob_mc(samples, data, rho_D_M, d_distr_samples, lambda_emulate=None, d_Tree=None): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples}})`, the probability assoicated with a set of voronoi cells defined by the model solves at :math:`(\lambda_{samples})` where the volumes of these voronoi cells are approximated using MC integration. :param samples: The samples in parameter space for which the model was run. :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) :param data: The data from running the model given the samples. :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) :param rho_D_M: The simple function approximation of rho_D :type rho_D_M: :class:`~numpy.ndarray` of shape (M,) :param d_distr_samples: The samples in the data space that define a parition of D to for the simple function approximation :type d_distr_samples: :class:`~numpy.ndarray` of shape (M, mdim) :param d_Tree: :class:`~scipy.spatial.KDTree` for d_distr_samples :param lambda_emulate: Samples used to partition the parameter space :rtype: tuple of :class:`~numpy.ndarray` of sizes (num_samples,), (num_samples,), (ndim, num_l_emulate), (num_samples,), (num_l_emulate,) :returns: (P, lam_vol, lambda_emulate, io_ptr, emulate_ptr) where P is the probability associated with samples, lam_vol the volumes associated with the samples, io_ptr a pointer from data to M bins, and emulate_ptr a pointer from emulated samples to samples (in parameter space) """ if len(samples.shape) == 1: samples = np.expand_dims(samples, axis=1) if len(data.shape) == 1: data = np.expand_dims(data, axis=1) if type(lambda_emulate) == type(None): lambda_emulate = samples if len(d_distr_samples.shape) == 1: d_distr_samples = np.expand_dims(d_distr_samples, axis=1) if type(d_Tree) == type(None): d_Tree = spatial.KDTree(d_distr_samples) # Determine which inputs go to which M bins using the QoI (_, io_ptr) = d_Tree.query(data) # Determine which emulated samples match with which model run samples l_Tree = spatial.KDTree(samples) (_, emulate_ptr) = l_Tree.query(lambda_emulate) # Apply the standard MC approximation to determine the number of emulated # samples per model run sample. This is for approximating # \mu_Lambda(A_i \intersect b_j) lam_vol = np.zeros((samples.shape[0],)) for i in range(samples.shape[0]): lam_vol[i] = np.sum(np.equal(emulate_ptr, i)) clam_vol = np.copy(lam_vol) comm.Allreduce([lam_vol, MPI.DOUBLE], [clam_vol, MPI.DOUBLE], op=MPI.SUM) lam_vol = clam_vol num_emulated = lambda_emulate.shape[0] num_emulated = comm.allreduce(num_emulated, op=MPI.SUM) lam_vol = lam_vol/(num_emulated) # Set up local arrays for parallelism local_index = range(0+comm.rank, samples.shape[0], comm.size) samples_local = samples[local_index, :] data_local = data[local_index, :] lam_vol_local = lam_vol[local_index] local_array = np.array(local_index, dtype='int64') # Determine which inputs go to which M bins using the QoI (_, io_ptr_local) = d_Tree.query(data_local) # Calculate Probabilities P_local = np.zeros((samples_local.shape[0],)) for i in range(rho_D_M.shape[0]): Itemp = np.equal(io_ptr_local, i) Itemp_sum = np.sum(lam_vol_local[Itemp]) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) if Itemp_sum > 0: P_local[Itemp] = rho_D_M[i]*lam_vol_local[Itemp]/Itemp_sum P_global = util.get_global_values(P_local) global_index = util.get_global_values(local_array) P = np.zeros(P_global.shape) P[global_index] = P_global[:] return (P, lam_vol, lambda_emulate, io_ptr, emulate_ptr)
def calculate_prob_for_sample_set_region(self, s_set, regions, update_input=True): """ Solves stochastic inverse problem based on surrogate points and the MC assumption. Calculates the probability of a regions of input space and error estimates for those probabilities. :param: s_set: sample set for which to calculate error :type s_set: :class:`bet.sample.sample_set_base` :param region: list of regions of s_set for which to calculate error :type region: list :param update_input: whether or not to update probabilities and errror identifiers for input discretization :type update_input: bool :rtype: tuple :returns: (probabilities, ``error_estimates``), the probability and error estimates for the region """ if not hasattr(self, 'surrogate_discretization'): msg = "surrogate discretization has not been created" raise calculateError.wrong_argument_type(msg) if not isinstance(s_set, sample.sample_set_base): msg = "s_set must be of type bet.sample.sample_set_base" raise calculateError.wrong_argument_type(msg) # Calculate probability of region if self.surrogate_discretization._input_sample_set._volumes_local\ is None: self.surrogate_discretization._input_sample_set.\ estimate_volume_mc(globalize=False) calculateP.prob(self.surrogate_discretization, globalize=False) prob_new_values = calculateP.prob_from_sample_set(\ self.surrogate_discretization._input_sample_set, s_set) # Calculate for each region probabilities = [] error_estimates = [] for region in regions: marker = np.equal(s_set._region, region) probability = np.sum(prob_new_values[marker]) # Calculate error estimate for region model_error = calculateError.model_error(\ self.surrogate_discretization) error_estimate = model_error.calculate_for_sample_set_region_mc(\ s_set, region) probabilities.append(probability) error_estimates.append(error_estimate) # Update input only if 1 region is given if update_input: num = self.input_disc._input_sample_set.check_num() prob = np.zeros((num,)) error_id = np.zeros((num,)) for i in range(num): Itemp = np.equal(self.dummy_disc._emulated_ii_ptr_local, i) prob_sum = np.sum(self.surrogate_discretization.\ _input_sample_set._probabilities_local[Itemp]) prob[i] = comm.allreduce(prob_sum, op=MPI.SUM) error_id_sum = np.sum(self.surrogate_discretization.\ _input_sample_set._error_id_local[Itemp]) error_id[i] = comm.allreduce(error_id_sum, op=MPI.SUM) self.input_disc._input_sample_set.set_probabilities(prob) self.input_disc._input_sample_set.set_error_id(error_id) return (probabilities, error_estimates)
def calculate_for_sample_set_region(self, s_set, region, emulated_set=None): """ Calculate the numerical error estimate for a region of the input space defined by a sample set object. :param s_set: sample set for which to calculate error :type s_set: :class:`bet.sample.sample_set_base` :param region: region of s_set for which to calculate error :type region: int :param emulated_set: sample set for volume emulation :type emulated_sample_set: :class:`bet.sample_set_base` :rtype: float :returns: ``er_est``, the numerical error estimate for the region """ # Set up marker if s_set._region is None: msg = "regions must be defined for the sample set." raise wrong_argument_type(msg) marker = np.equal(s_set._region, region) if not np.any(marker): msg = "The given region does not exist." raise wrong_argument_type(msg) # Setup discretizations if emulated_set is not None: self.disc._input_sample_set.local_to_global() self.disc.globalize_ptrs() self.disc_new.globalize_ptrs() disc = self.disc.copy() disc.set_emulated_input_sample_set(emulated_set) disc.set_emulated_ii_ptr(globalize=False) disc_new_set = samp.discretization(input_sample_set=s_set, output_sample_set=s_set, emulated_input_sample_set=emulated_set) disc_new_set.set_emulated_ii_ptr(globalize=False) elif self.disc._emulated_input_sample_set is not None: self.disc._input_sample_set.local_to_global() msg = "Using emulated_input_sample_set for volume emulation" logging.warning(msg) self.disc.globalize_ptrs() self.disc_new.globalize_ptrs() disc = self.disc if disc._emulated_ii_ptr_local is None: disc.set_emulated_ii_ptr(globalize=False) self.disc_new.set_emulated_ii_ptr(globalize=False) disc_new_set = samp.discretization(input_sample_set=s_set, output_sample_set=s_set, emulated_input_sample_set=disc._emulated_input_sample_set) disc_new_set.set_emulated_ii_ptr(globalize=False) else: logging.warning("Using MC assumption for volumes.") return self.calculate_for_sample_set_region_mc(s_set, region) # Setup pointers ptr1 = disc._emulated_ii_ptr_local ptr3 = disc_new_set._emulated_ii_ptr_local # Check if in the region in_A = marker[ptr3] # Loop over contour events and add error contribution er_est = 0.0 ops_num = self.disc._output_probability_set.check_num() for i in range(ops_num): if self.disc._output_probability_set._probabilities[i] > 0.0: # JiA, Ji, Jie, and JiAe are defined ast in # `Butler et al. 2015. <http://arxiv.org/pdf/1407.3851>`_ indices1 = np.equal(self.disc._io_ptr, i) in_Ai1 = indices1[ptr1] indices2 = np.equal(self.disc_new._io_ptr, i) in_Ai2 = indices2[ptr1] JiA_local = float(np.sum(np.logical_and(in_A, in_Ai1))) JiA = comm.allreduce(JiA_local, op=MPI.SUM) Ji_local = float(np.sum(in_Ai1)) Ji = comm.allreduce(Ji_local, op=MPI.SUM) JiAe_local = float(np.sum(np.logical_and(in_A, in_Ai2))) JiAe = comm.allreduce(JiAe_local, op=MPI.SUM) Jie_local = float(np.sum(in_Ai2)) Jie = comm.allreduce(Jie_local, op=MPI.SUM) er_est += self.disc._output_probability_set._probabilities[i]\ * ((JiA*Jie - JiAe*Ji)/(Ji*Jie)) return er_est
def calculate_for_sample_set_region_mc(self, s_set, region): """ Calculate the numerical error estimate for a region of the input space defined by a sample set object, using the MC assumption. :param s_set: sample set for which to calculate error :type s_set: :class:`bet.sample.sample_set_base` :param region: region of s_set for which to calculate error :type region: int :rtype float :returns: ``er_est``, the numerical error estimate for the region """ # Set up marker if s_set._region is None: msg = "regions must be defined for the sample set." raise wrong_argument_type(msg) marker = np.equal(s_set._region, region) if not np.any(marker): msg = "The given region does not exist." raise wrong_argument_type(msg) disc_new_set = samp.discretization(input_sample_set=s_set, output_sample_set=s_set, emulated_input_sample_set=self.disc._input_sample_set) disc_new_set.set_emulated_ii_ptr(globalize=False) # Check if in the region in_A = marker[disc_new_set._emulated_ii_ptr_local] # Loop over contour events and add error contribution er_est = 0.0 ops_num = self.disc._output_probability_set.check_num() num_local = self.disc._input_sample_set.check_num_local() self.disc._input_sample_set._error_id_local = np.zeros((num_local,)) for i in range(ops_num): if self.disc._output_probability_set._probabilities[i] > 0.0: # JiA, Ji, Jie, and JiAe are defined ast in # `Butler et al. 2015. <http://arxiv.org/pdf/1407.3851>` in_Ai1 = np.equal(self.disc._io_ptr_local, i) in_Ai2 = np.equal(self.disc_new._io_ptr_local, i) JiA_local = float(np.sum(np.logical_and(in_A, in_Ai1))) JiA = comm.allreduce(JiA_local, op=MPI.SUM) Ji_local = float(np.sum(in_Ai1)) Ji = comm.allreduce(Ji_local, op=MPI.SUM) JiAe_local = float(np.sum(np.logical_and(in_A, in_Ai2))) JiAe = comm.allreduce(JiAe_local, op=MPI.SUM) Jie_local = float(np.sum(in_Ai2)) Jie = comm.allreduce(Jie_local, op=MPI.SUM) if Ji*Jie == 0: er_cont = np.inf else: er_cont = self.disc._output_probability_set._probabilities[i]\ * ((JiA*Jie - JiAe*Ji)/(Ji*Jie)) er_est += er_cont error_cells1 = np.logical_and(np.logical_and(in_Ai1, np.logical_not(in_A)), np.logical_and(in_Ai2, in_A)) error_cells2 = np.logical_and(np.logical_and(in_Ai2, np.logical_not(in_A)), np.logical_and(in_Ai1, in_A)) error_cells3 = np.not_equal(in_Ai1, in_Ai2) error_cells = np.logical_or(error_cells1, error_cells2) error_cells = np.logical_or(error_cells, error_cells3) error_cells_num_local = float(np.sum(error_cells)) error_cells_num = comm.allreduce(error_cells_num_local, op=MPI.SUM) if error_cells_num != 0: self.disc._input_sample_set._error_id_local[error_cells] \ += er_cont/error_cells_num return er_est
def calculate_prob_for_sample_set_region(self, s_set, regions, update_input=True): """ Solves stochastic inverse problem based on surrogate points and the MC assumption. Calculates the probability of a regions of input space and error estimates for those probabilities. :param: s_set: sample set for which to calculate error :type s_set: :class:`bet.sample.sample_set_base` :param region: list of regions of s_set for which to calculate error :type region: list :param update_input: whether or not to update probabilities and errror identifiers for input discretization :type update_input: bool :rtype: tuple :returns: (probabilities, ``error_estimates``), the probability and error estimates for the region """ if not hasattr(self, 'surrogate_discretization'): msg = "surrogate discretization has not been created" raise calculateError.wrong_argument_type(msg) if not isinstance(s_set, sample.sample_set_base): msg = "s_set must be of type bet.sample.sample_set_base" raise calculateError.wrong_argument_type(msg) # Calculate probability of region if self.surrogate_discretization._input_sample_set._volumes_local\ is None: self.surrogate_discretization._input_sample_set.\ estimate_volume_mc(globalize=False) calculateP.prob(self.surrogate_discretization, globalize=False) prob_new_values = calculateP.prob_from_sample_set(\ self.surrogate_discretization._input_sample_set, s_set) # Calculate for each region probabilities = [] error_estimates = [] for region in regions: marker = np.equal(s_set._region, region) probability = np.sum(prob_new_values[marker]) # Calculate error estimate for region model_error = calculateError.model_error(\ self.surrogate_discretization) error_estimate = model_error.calculate_for_sample_set_region_mc(\ s_set, region) probabilities.append(probability) error_estimates.append(error_estimate) # Update input only if 1 region is given if update_input: num = self.input_disc._input_sample_set.check_num() prob = np.zeros((num, )) error_id = np.zeros((num, )) for i in range(num): Itemp = np.equal(self.dummy_disc._emulated_ii_ptr_local, i) prob_sum = np.sum(self.surrogate_discretization.\ _input_sample_set._probabilities_local[Itemp]) prob[i] = comm.allreduce(prob_sum, op=MPI.SUM) error_id_sum = np.sum(self.surrogate_discretization.\ _input_sample_set._error_id_local[Itemp]) error_id[i] = comm.allreduce(error_id_sum, op=MPI.SUM) self.input_disc._input_sample_set.set_probabilities(prob) self.input_disc._input_sample_set.set_error_id(error_id) return (probabilities, error_estimates)
def prob_from_sample_set_with_emulated_volumes(set_old, set_new, set_emulate=None): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples_new}})` from :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples_old}})` using a set of emulated points are distributed with respect to the volume measure. :param set_old: Sample set on which probabilities have already been calculated :type set_old: :class:`~bet.sample.sample_set_base` :param set_new: Sample set for which probabilities will be calculated. :type set_new: :class:`~bet.sample.sample_set_base` :param set_emulate: Sample set for volume emulation :type set_emulate: :class:`~bet.sample.sample_set_base` """ if set_emulate is None: logging.warning("Using MC assumption because no emulated points given") return prob_from_sample_set(set_old, set_new) # Check dimensions num_old = set_old.check_num() num_new = set_new.check_num() set_emulate.check_num() if (set_old._dim != set_new._dim) or (set_old._dim != set_emulate._dim): raise samp.dim_not_matching("Dimensions of sets are not equal.") # Localize emulated points if set_emulate._values_local is None: set_emulate.global_to_local() # Map emulated points to old and new sets (_, ptr1) = set_old.query(set_emulate._values_local) (_, ptr2) = set_new.query(set_emulate._values_local) ptr1 = ptr1.flat[:] ptr2 = ptr2.flat[:] # Set up probability vectors prob_new = np.zeros((num_new,)) prob_em = np.zeros((len(ptr1), )) # Loop over old cells and divide probability over emulated cells warn = False for i in range(num_old): if set_old._probabilities[i] > 0.0: Itemp = np.equal(ptr1, i) Itemp_sum = np.sum(Itemp) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) if Itemp_sum > 0: prob_em[Itemp] += set_old._probabilities[i]/float(Itemp_sum) else: warn = True # Warn that some cells have no emulated points in them if warn: msg = "Some old cells have no emulated points in them. " msg += "Renormalizing probability." logging.warning(msg) total_prob = np.sum(prob_em) total_prob = comm.allreduce(total_prob, op=MPI.SUM) prob_em = prob_em/total_prob # Loop over new cells and distribute probability from emulated cells for i in range(num_new): Itemp = np.equal(ptr2, i) Itemp_sum = np.sum(prob_em[Itemp]) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) prob_new[i] = Itemp_sum # Set probabilities set_new.set_probabilities(prob_new) return prob_new