def compare_get_global_values(i, provide_shape): """ Compares the results of get global values for a vector of shape ``(comm.size*2, i)``. :param int i: Dimension of the vector of length ``comm.size*2`` """ if comm.rank == 0: if i == 0: original_array = np.array(np.random.random((comm.size * 2, ))) else: original_array = np.array(np.random.random((comm.size * 2, i))) else: original_array = None original_array = comm.bcast(original_array) my_len = original_array.shape[0] / comm.size my_index = range(0 + comm.rank * my_len, (comm.rank + 1) * my_len) if i == 0: my_array = original_array[my_index] else: my_array = original_array[my_index, :] if provide_shape: recomposed_array = util.get_global_values(my_array, original_array.shape) else: recomposed_array = util.get_global_values(my_array) nptest.assert_array_equal(original_array, recomposed_array)
def compare_get_global_values(i, provide_shape): """ Compares the results of get global values for a vector of shape ``(comm.size*2, i)``. :param int i: Dimension of the vector of length ``comm.size*2`` """ if comm.rank == 0: if i == 0: original_array = np.array(np.random.random((comm.size * 2,))) else: original_array = np.array(np.random.random((comm.size * 2, i))) else: original_array = None original_array = comm.bcast(original_array) my_len = original_array.shape[0] / comm.size my_index = range(0 + comm.rank * my_len, (comm.rank + 1) * my_len) if i == 0: my_array = original_array[my_index] else: my_array = original_array[my_index, :] if provide_shape: recomposed_array = util.get_global_values(my_array, original_array.shape) else: recomposed_array = util.get_global_values(my_array) nptest.assert_array_equal(original_array, recomposed_array)
def prob(samples, data, rho_D_M, d_distr_samples, d_Tree=None): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples}})`, the probability assoicated with a set of voronoi cells defined by the model solves at :math:`(\lambda_{samples})` where the volumes of these voronoi cells are assumed to be equal under the MC assumption. :param samples: The samples in parameter space for which the model was run. :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) :param data: The data from running the model given the samples. :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) :param rho_D_M: The simple function approximation of rho_D :type rho_D_M: :class:`~numpy.ndarray` of shape (M,) :param d_distr_samples: The samples in the data space that define a parition of D to for the simple function approximation :type d_distr_samples: :class:`~numpy.ndarray` of shape (M, mdim) :param d_Tree: :class:`~scipy.spatial.KDTree` for d_distr_samples :rtype: tuple of :class:`~numpy.ndarray` of sizes (num_samples,), (num_samples,), (ndim, num_l_emulate), (num_samples,), (num_l_emulate,) :returns: (P, lam_vol, io_ptr) where P is the probability associated with samples, and lam_vol the volumes associated with the samples, io_ptr a pointer from data to M bins. """ if len(samples.shape) == 1: samples = np.expand_dims(samples, axis=1) if len(data.shape) == 1: data = np.expand_dims(data, axis=1) if len(d_distr_samples.shape) == 1: d_distr_samples = np.expand_dims(d_distr_samples, axis=1) if type(d_Tree) == type(None): d_Tree = spatial.KDTree(d_distr_samples) # Set up local arrays for parallelism local_index = range(0+comm.rank, samples.shape[0], comm.size) samples_local = samples[local_index, :] data_local = data[local_index, :] local_array = np.array(local_index, dtype='int64') # Determine which inputs go to which M bins using the QoI (_, io_ptr) = d_Tree.query(data_local) # Apply the standard MC approximation and # calculate probabilities P_local = np.zeros((samples_local.shape[0],)) for i in range(rho_D_M.shape[0]): Itemp = np.equal(io_ptr, i) Itemp_sum = np.sum(Itemp) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) if Itemp_sum > 0: P_local[Itemp] = rho_D_M[i]/Itemp_sum P_global = util.get_global_values(P_local) global_index = util.get_global_values(local_array) P = np.zeros(P_global.shape) P[global_index] = P_global[:] lam_vol = (1.0/float(samples.shape[0]))*np.ones((samples.shape[0],)) return (P, lam_vol, io_ptr)
def user_samples(self, samples, savefile, parallel=False): """ Samples the model at ``samples`` and saves the results. Note: There are many ways to generate samples on a regular grid in Numpy and other Python packages. Instead of reimplementing them here we provide sampler that utilizes user specified samples. :param samples: samples to evaluate the model at :type samples: :class:`~numpy.ndarray` of shape (num_smaples, ndim) :param string savefile: filename to save samples and data :param bool parallel: Flag for parallel implementation. Uses lowercase ``mpi4py`` methods if ``samples.shape[0]`` is not divisible by ``size``. Default value is ``False``. :rtype: tuple :returns: (``parameter_samples``, ``data_samples``) where ``parameter_samples`` is np.ndarray of shape (num_samples, ndim) and ``data_samples`` is np.ndarray of shape (num_samples, mdim) """ # Update the number of samples self.num_samples = samples.shape[0] # Solve the model at the samples if not (parallel) or comm.size == 1: data = self.lb_model(samples) elif parallel: my_len = self.num_samples / comm.size if comm.rank != comm.size - 1: my_index = range(0 + comm.rank * my_len, (comm.rank + 1) * my_len) else: my_index = range(0 + comm.rank * my_len, self.num_samples) if len(samples.shape) == 1: my_samples = samples[my_index] else: my_samples = samples[my_index, :] my_data = self.lb_model(my_samples) data = util.get_global_values(my_data) samples = util.get_global_values(my_samples) # if data or samples are of shape (num_samples,) expand dimensions if len(samples.shape) == 1: samples = np.expand_dims(samples, axis=1) if len(data.shape) == 1: data = np.expand_dims(data, axis=1) mdat = dict() self.update_mdict(mdat) mdat['samples'] = samples mdat['data'] = data if comm.rank == 0: self.save(mdat, savefile) return (samples, data)
def user_samples(self, samples, savefile, parallel=False): """ Samples the model at ``samples`` and saves the results. Note: There are many ways to generate samples on a regular grid in Numpy and other Python packages. Instead of reimplementing them here we provide sampler that utilizes user specified samples. :param samples: samples to evaluate the model at :type samples: :class:`~numpy.ndarray` of shape (num_smaples, ndim) :param string savefile: filename to save samples and data :param bool parallel: Flag for parallel implementation. Uses lowercase ``mpi4py`` methods if ``samples.shape[0]`` is not divisible by ``size``. Default value is ``False``. :rtype: tuple :returns: (``parameter_samples``, ``data_samples``) where ``parameter_samples`` is np.ndarray of shape (num_samples, ndim) and ``data_samples`` is np.ndarray of shape (num_samples, mdim) """ # Update the number of samples self.num_samples = samples.shape[0] # Solve the model at the samples if not(parallel) or comm.size == 1: data = self.lb_model(samples) elif parallel: my_len = self.num_samples/comm.size if comm.rank != comm.size-1: my_index = range(0+comm.rank*my_len, (comm.rank+1)*my_len) else: my_index = range(0+comm.rank*my_len, self.num_samples) if len(samples.shape) == 1: my_samples = samples[my_index] else: my_samples = samples[my_index, :] my_data = self.lb_model(my_samples) data = util.get_global_values(my_data) samples = util.get_global_values(my_samples) # if data or samples are of shape (num_samples,) expand dimensions if len(samples.shape) == 1: samples = np.expand_dims(samples, axis=1) if len(data.shape) == 1: data = np.expand_dims(data, axis=1) mdat = dict() self.update_mdict(mdat) mdat['samples'] = samples mdat['data'] = data if comm.rank == 0: self.save(mdat, savefile) return (samples, data)
def globalize_ptrs(self): r""" Globalizes comparison pointers by caling ``get_global_values`` for both the left and right sample sets. """ if (self._ptr_left_local is not None) and\ (self._ptr_left is None): self._ptr_left = util.get_global_values( self._ptr_left_local) if (self._ptr_right_local is not None) and\ (self._ptr_right is None): self._ptr_right = util.get_global_values( self._ptr_right_local)
def setUp(self): """ Set up problem. """ super(Test_prob_emulated_1to1, self).setUp() (self.P_emulate, self.lambda_emulate, _, _) =\ calcP.prob_emulated(samples=self.samples, data=self.data, rho_D_M=self.d_distr_prob, d_distr_samples=self.d_distr_samples, lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree) self.P_emulate = util.get_global_values(self.P_emulate)
def setUp(self): """ Set up problem. """ super(Test_prob_emulated_3to1, self).setUp() (self.P_emulate, self.lambda_emulate, _, _) = calcP.prob_emulated(\ samples=self.samples, data=self.data, rho_D_M=self.d_distr_prob, d_distr_samples=self.d_distr_samples, lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree) self.P_emulate_ref = np.loadtxt(data_path+"/3to1_prob_emulated.txt.gz") self.P_emulate = util.get_global_values(self.P_emulate)
def postprocess(station_nums, ref_num): filename = 'P_q'+str(station_nums[0]+1)+'_q'+str(station_nums[1]+1) if len(station_nums) == 3: filename += '_q'+str(station_nums[2]+1) filename += '_truth_'+str(ref_num+1) data = Q[:, station_nums] q_ref = Q_ref[ref_num, station_nums] # Create Simple function approximation # Save points used to parition D for simple function approximation and the # approximation itself (this can be used to make close comparisions...) (rho_D_M, d_distr_samples, d_Tree) = sfun.uniform_hyperrectangle(data, q_ref, bin_ratio=0.15, center_pts_per_edge=np.ones((data.shape[1],))) num_l_emulate = 1e6 lambda_emulate = calcP.emulate_iid_lebesgue(lam_domain, num_l_emulate) print "Finished emulating lambda samples" # Calculate P on the actual samples estimating voronoi cell volume with MC # integration (P3, lam_vol3, lambda_emulate3, io_ptr3, emulate_ptr3) = calcP.prob_mc(samples, data, rho_D_M, d_distr_samples, lam_domain, lambda_emulate, d_Tree) print "Calculating prob_mc" mdict = dict() mdict['rho_D_M'] = rho_D_M mdict['d_distr_samples'] = d_distr_samples mdict['lambda_emulate'] = util.get_global_values(lambda_emulate) mdict['num_l_emulate'] = mdict['lambda_emulate'].shape[1] mdict['P3'] = util.get_global_values(P3) mdict['lam_vol3'] = util.get_global_values(lam_vol3) mdict['io_ptr3'] = util.get_global_values(io_ptr3) mdict['emulate_ptr3'] = emulate_ptr3 if rank == 0: # Export P and compare to MATLAB solution visually sio.savemat(filename, mdict, do_compression=True)
def my_model(io_file_name): # read in input from file io_mdat = sio.loadmat(io_file_name) input = io_mdat['input'] # localize input input_local = np.array_split(input, comm.size)[comm.rank] # model is y = x[:, 0:dim/2 ] + x[:, dim/2:] output_local = sum(np.split(input_local, 2, 1)) # save output to file io_mdat['output'] = util.get_global_values(output_local) comm.barrier() if comm.rank == 0: sio.savemat(io_file_name, io_mdat)
def setUp(self): """ Set up problem. """ super(Test_prob_emulated_1to1, self).setUp() (self.P_emulate, self.lambda_emulate, _, _) = calcP.prob_emulated(samples=self.samples, data=self.data, rho_D_M=self.d_distr_prob, d_distr_samples=self.d_distr_samples, lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree) self.P_emulate = util.get_global_values(self.P_emulate)
def setUp(self): """ Set up problem. """ super(Test_prob_emulated_3to1, self).setUp() (self.P_emulate, self.lambda_emulate, _, _) = calcP.prob_emulated(samples=self.samples, data=self.data, rho_D_M=self.d_distr_prob, d_distr_samples=self.d_distr_samples, lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree) self.P_emulate_ref = np.loadtxt(data_path + "/3to1_prob_emulated.txt.gz") self.P_emulate = util.get_global_values(self.P_emulate)
def set_ptr_right(self, globalize=True): """ Creates the pointer from ``self._comparison_sample_set`` to ``self._right_sample_set`` .. seealso:: :meth:`scipy.spatial.KDTree.query`` :param bool globalize: flag whether or not to globalize ``self._ptr_right`` """ if self._comparison_sample_set._values_local is None: self._comparison_sample_set.global_to_local() (_, self._ptr_right_local) = self._right_sample_set.query( self._comparison_sample_set._values_local) if globalize: self._ptr_right = util.get_global_values( self._ptr_right_local) assert self._right_sample_set.check_num() >= max(self._ptr_right_local)
def generalized_chains(self, param_min, param_max, t_set, kern, savefile, initial_sample_type="lhs", criterion='center'): """ Basic adaptive sampling algorithm using generalized chains. :param string initial_sample_type: type of initial sample random (or r), latin hypercube(lhs), or space-filling curve(TBD) :param param_min: minimum value for each parameter dimension :type param_min: :class:`numpy.ndarray` (ndim,) :param param_max: maximum value for each parameter dimension :type param_max: :class:`numpy.ndarray` (ndim,) :param t_set: method for creating new parameter steps using given a step size based on the paramter domain size :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set` :param kern: functional that acts on the data used to determine the proposed change to the ``step_size`` :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object. :param string savefile: filename to save samples and data :param string criterion: latin hypercube criterion see `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_ :rtype: tuple :returns: (``parameter_samples``, ``data_samples``, ``all_step_ratios``) where ``parameter_samples`` is np.ndarray of shape (num_samples, ndim), ``data_samples`` is np.ndarray of shape (num_samples, mdim), and ``all_step_ratios`` is np.ndarray of shape (num_chains, chain_length) """ if comm.size > 1: psavefile = os.path.join( os.path.dirname(savefile), "proc{}{}".format(comm.rank, os.path.basename(savefile))) # Initialize Nx1 vector Step_size = something reasonable (based on size # of domain and transition set type) # Calculate domain size param_left = np.repeat([param_min], self.num_chains_pproc, 0) param_right = np.repeat([param_max], self.num_chains_pproc, 0) param_width = param_right - param_left # Calculate step_size max_ratio = t_set.max_ratio min_ratio = t_set.min_ratio step_ratio = t_set.init_ratio * np.ones(self.num_chains_pproc) # Initiative first batch of N samples (maybe taken from latin # hypercube/space-filling curve to fully explore parameter space - not # necessarily random). Call these Samples_old. (samples_old, data_old) = super(sampler, self).random_samples(initial_sample_type, param_min, param_max, savefile, self.num_chains, criterion) self.num_samples = self.chain_length * self.num_chains comm.Barrier() # now split it all up if comm.size > 1: MYsamples_old = np.empty((np.shape(samples_old)[0] / comm.size, np.shape(samples_old)[1])) comm.Scatter([samples_old, MPI.DOUBLE], [MYsamples_old, MPI.DOUBLE]) MYdata_old = np.empty( (np.shape(data_old)[0] / comm.size, np.shape(data_old)[1])) comm.Scatter([data_old, MPI.DOUBLE], [MYdata_old, MPI.DOUBLE]) else: MYsamples_old = np.copy(samples_old) MYdata_old = np.copy(data_old) samples = MYsamples_old data = MYdata_old all_step_ratios = step_ratio (kern_old, proposal) = kern.delta_step(MYdata_old, None) mdat = dict() self.update_mdict(mdat) for batch in xrange(1, self.chain_length): # For each of N samples_old, create N new parameter samples using # transition set and step_ratio. Call these samples samples_new. samples_new = t_set.step(step_ratio, param_width, param_left, param_right, MYsamples_old) # Solve the model for the samples_new. data_new = self.lb_model(samples_new) # Make some decision about changing step_size(k). There are # multiple ways to do this. # Determine step size (kern_old, proposal) = kern.delta_step(data_new, kern_old) step_ratio = proposal * step_ratio # Is the ratio greater than max? step_ratio[step_ratio > max_ratio] = max_ratio # Is the ratio less than min? step_ratio[step_ratio < min_ratio] = min_ratio # Save and export concatentated arrays if self.chain_length < 4: pass elif (batch + 1) % (self.chain_length / 4) == 0: print "Current chain length: " + str(batch + 1) + "/" + str( self.chain_length) samples = np.concatenate((samples, samples_new)) data = np.concatenate((data, data_new)) all_step_ratios = np.concatenate((all_step_ratios, step_ratio)) mdat['step_ratios'] = all_step_ratios mdat['samples'] = samples mdat['data'] = data if comm.size > 1: super(sampler, self).save(mdat, psavefile) else: super(sampler, self).save(mdat, savefile) MYsamples_old = samples_new # collect everything MYsamples = np.copy(samples) MYdata = np.copy(data) MYall_step_ratios = np.copy(all_step_ratios) # ``parameter_samples`` is np.ndarray of shape (num_samples, ndim) samples = util.get_global_values(MYsamples, shape=(self.num_samples, np.shape(MYsamples)[1])) # and ``data_samples`` is np.ndarray of shape (num_samples, mdim) data = util.get_global_values(MYdata, shape=(self.num_samples, np.shape(MYdata)[1])) # ``all_step_ratios`` is np.ndarray of shape (num_chains, # chain_length) all_step_ratios = util.get_global_values(MYall_step_ratios, shape=(self.num_samples, )) all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, self.chain_length)) # save everything mdat['step_ratios'] = all_step_ratios mdat['samples'] = samples mdat['data'] = data super(sampler, self).save(mdat, savefile) return (samples, data, all_step_ratios)
def generalized_chains(self, param_min, param_max, t_set, kern, savefile, initial_sample_type="lhs", criterion='center'): """ Basic adaptive sampling algorithm using generalized chains. :param string initial_sample_type: type of initial sample random (or r), latin hypercube(lhs), or space-filling curve(TBD) :param param_min: minimum value for each parameter dimension :type param_min: :class:`numpy.ndarray` (ndim,) :param param_max: maximum value for each parameter dimension :type param_max: :class:`numpy.ndarray` (ndim,) :param t_set: method for creating new parameter steps using given a step size based on the paramter domain size :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set` :param kern: functional that acts on the data used to determine the proposed change to the ``step_size`` :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object. :param string savefile: filename to save samples and data :param string criterion: latin hypercube criterion see `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_ :rtype: tuple :returns: (``parameter_samples``, ``data_samples``, ``all_step_ratios``) where ``parameter_samples`` is np.ndarray of shape (num_samples, ndim), ``data_samples`` is np.ndarray of shape (num_samples, mdim), and ``all_step_ratios`` is np.ndarray of shape (num_chains, chain_length) """ if comm.size > 1: psavefile = os.path.join(os.path.dirname(savefile), "proc{}{}".format(comm.rank, os.path.basename(savefile))) # Initialize Nx1 vector Step_size = something reasonable (based on size # of domain and transition set type) # Calculate domain size param_left = np.repeat([param_min], self.num_chains_pproc, 0) param_right = np.repeat([param_max], self.num_chains_pproc, 0) param_width = param_right - param_left # Calculate step_size max_ratio = t_set.max_ratio min_ratio = t_set.min_ratio step_ratio = t_set.init_ratio*np.ones(self.num_chains_pproc) # Initiative first batch of N samples (maybe taken from latin # hypercube/space-filling curve to fully explore parameter space - not # necessarily random). Call these Samples_old. (samples_old, data_old) = super(sampler, self).random_samples( initial_sample_type, param_min, param_max, savefile, self.num_chains, criterion) self.num_samples = self.chain_length * self.num_chains comm.Barrier() # now split it all up if comm.size > 1: MYsamples_old = np.empty((np.shape(samples_old)[0]/comm.size, np.shape(samples_old)[1])) comm.Scatter([samples_old, MPI.DOUBLE], [MYsamples_old, MPI.DOUBLE]) MYdata_old = np.empty((np.shape(data_old)[0]/comm.size, np.shape(data_old)[1])) comm.Scatter([data_old, MPI.DOUBLE], [MYdata_old, MPI.DOUBLE]) else: MYsamples_old = np.copy(samples_old) MYdata_old = np.copy(data_old) samples = MYsamples_old data = MYdata_old all_step_ratios = step_ratio (kern_old, proposal) = kern.delta_step(MYdata_old, None) mdat = dict() self.update_mdict(mdat) for batch in xrange(1, self.chain_length): # For each of N samples_old, create N new parameter samples using # transition set and step_ratio. Call these samples samples_new. samples_new = t_set.step(step_ratio, param_width, param_left, param_right, MYsamples_old) # Solve the model for the samples_new. data_new = self.lb_model(samples_new) # Make some decision about changing step_size(k). There are # multiple ways to do this. # Determine step size (kern_old, proposal) = kern.delta_step(data_new, kern_old) step_ratio = proposal*step_ratio # Is the ratio greater than max? step_ratio[step_ratio > max_ratio] = max_ratio # Is the ratio less than min? step_ratio[step_ratio < min_ratio] = min_ratio # Save and export concatentated arrays if self.chain_length < 4: pass elif (batch+1)%(self.chain_length/4) == 0: print "Current chain length: "+str(batch+1)+"/"+str(self.chain_length) samples = np.concatenate((samples, samples_new)) data = np.concatenate((data, data_new)) all_step_ratios = np.concatenate((all_step_ratios, step_ratio)) mdat['step_ratios'] = all_step_ratios mdat['samples'] = samples mdat['data'] = data if comm.size > 1: super(sampler, self).save(mdat, psavefile) else: super(sampler, self).save(mdat, savefile) MYsamples_old = samples_new # collect everything MYsamples = np.copy(samples) MYdata = np.copy(data) MYall_step_ratios = np.copy(all_step_ratios) # ``parameter_samples`` is np.ndarray of shape (num_samples, ndim) samples = util.get_global_values(MYsamples, shape=(self.num_samples, np.shape(MYsamples)[1])) # and ``data_samples`` is np.ndarray of shape (num_samples, mdim) data = util.get_global_values(MYdata, shape=(self.num_samples, np.shape(MYdata)[1])) # ``all_step_ratios`` is np.ndarray of shape (num_chains, # chain_length) all_step_ratios = util.get_global_values(MYall_step_ratios, shape=(self.num_samples,)) all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, self.chain_length)) # save everything mdat['step_ratios'] = all_step_ratios mdat['samples'] = samples mdat['data'] = data super(sampler, self).save(mdat, savefile) return (samples, data, all_step_ratios)
def generalized_chains(self, param_min, param_max, t_set, kern, savefile, initial_sample_type="random", criterion='center', hot_start=0): """ Basic adaptive sampling algorithm using generalized chains. :param string initial_sample_type: type of initial sample random (or r), latin hypercube(lhs), or space-filling curve(TBD) :param param_min: minimum value for each parameter dimension :type param_min: :class:`numpy.ndarray` (ndim,) :param param_max: maximum value for each parameter dimension :type param_max: :class:`numpy.ndarray` (ndim,) :param t_set: method for creating new parameter steps using given a step size based on the paramter domain size :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set` :param kern: functional that acts on the data used to determine the proposed change to the ``step_size`` :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object. :param string savefile: filename to save samples and data :param int hot_start: Flag whether or not hot start the sampling chains from a previous set of chains. Note that ``num_chains`` must be the same, but ``num_chains_pproc`` need not be the same. 0 - cold start, 1 - hot start from uncompleted run, 2 - hot start from finished run :param string criterion: latin hypercube criterion see `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_ :rtype: tuple :returns: (``parameter_samples``, ``data_samples``, ``all_step_ratios``) where ``parameter_samples`` is np.ndarray of shape (num_samples, ndim), ``data_samples`` is np.ndarray of shape (num_samples, mdim), and ``all_step_ratios`` is np.ndarray of shape (num_chains, chain_length) """ if comm.size > 1: psavefile = os.path.join(os.path.dirname(savefile), "proc{}_{}".format(comm.rank, os.path.basename(savefile))) # Initialize Nx1 vector Step_size = something reasonable (based on size # of domain and transition set type) # Calculate domain size param_left = np.repeat([param_min], self.num_chains_pproc, 0) param_right = np.repeat([param_max], self.num_chains_pproc, 0) param_width = param_right - param_left # Calculate step_size max_ratio = t_set.max_ratio min_ratio = t_set.min_ratio if not hot_start: step_ratio = t_set.init_ratio*np.ones(self.num_chains_pproc) # Initiative first batch of N samples (maybe taken from latin # hypercube/space-filling curve to fully explore parameter space - # not necessarily random). Call these Samples_old. (samples_old, data_old) = super(sampler, self).random_samples( initial_sample_type, param_min, param_max, savefile, self.num_chains, criterion) self.num_samples = self.chain_length * self.num_chains comm.Barrier() # now split it all up if comm.size > 1: MYsamples_old = np.empty((np.shape(samples_old)[0]/comm.size, np.shape(samples_old)[1])) comm.Scatter([samples_old, MPI.DOUBLE], [MYsamples_old, MPI.DOUBLE]) MYdata_old = np.empty((np.shape(data_old)[0]/comm.size, np.shape(data_old)[1])) comm.Scatter([data_old, MPI.DOUBLE], [MYdata_old, MPI.DOUBLE]) else: MYsamples_old = np.copy(samples_old) MYdata_old = np.copy(data_old) samples = MYsamples_old data = MYdata_old all_step_ratios = step_ratio (kern_old, proposal) = kern.delta_step(MYdata_old, None) start_ind = 1 if hot_start: # LOAD FILES if hot_start == 1: # HOT START FROM PARTIAL RUN if comm.rank == 0: print "HOT START from partial run" # Find and open save files save_dir = os.path.dirname(savefile) base_name = os.path.dirname(savefile) mdat_files = glob.glob(os.path.join(save_dir, "proc*_{}".format(base_name))) if len(mdat_files) == 0: print "HOT START using serial file" mdat = sio.loadmat(savefile) samples = mdat['samples'] data = mdat['data'] kern_old = np.squeeze(mdat['kern_old']) all_step_ratios = np.squeeze(mdat['step_ratios']) chain_length = samples.shape[0]/self.num_chains if all_step_ratios.shape == (self.num_chains, chain_length): print "Serial file, from completed run updating hot_start" hot_start = 2 # reshape if parallel if comm.size > 1: samples = np.reshape(samples, (self.num_chains, chain_length, -1), 'F') data = np.reshape(data, (self.num_chains, chain_length, -1), 'F') all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, -1), 'F') elif hot_start == 1 and len(mdat_files) == comm.size: print "HOT START using parallel files (same nproc)" # if the number of processors is the same then set mdat to # be the one with the matching processor number (doesn't # really matter) mdat = sio.loadmat(mdat_files[comm.rank]) samples = mdat['samples'] data = mdat['data'] kern_old = np.squeeze(mdat['kern_old']) all_step_ratios = np.squeeze(mdat['step_ratios']) elif hot_start == 1 and len(mdat_files) != comm.size: print "HOT START using parallel files (diff nproc)" # Determine how many processors the previous data used # otherwise gather the data from mdat and then scatter # among the processors and update mdat mdat_files_local = comm.scatter(mdat_files) mdat_local = [sio.loadmat(m) for m in mdat_files_local] mdat_list = comm.allgather(mdat_local) mdat_global = [] # instead of a list of lists, create a list of mdat for mlist in mdat_list: mdat_global.extend(mlist) # get num_proc and num_chains_pproc for previous run old_num_proc = max((len(mdat_list), 1)) old_num_chains_pproc = self.num_chains/old_num_proc # get batch size and/or number of dimensions chain_length = mdat_global[0]['samples'].shape[0]/\ old_num_chains_pproc # create lists of local data samples = [] data = [] all_step_ratios = [] kern_old = [] # RESHAPE old_num_chains_pproc, chain_length(or batch), dim for mdat in mdat_global: samples.append(np.reshape(mdat['samples'], (old_num_chains_pproc, chain_length, -1), 'F')) data.append(np.reshape(mdat['data'], (old_num_chains_pproc, chain_length, -1), 'F')) all_step_ratios.append(np.reshape(mdat['step_ratios'], (old_num_chains_pproc, chain_length, -1), 'F')) kern_old.append(np.reshape(mdat['kern_old'], (old_num_chains_pproc,), 'F')) # turn into arrays samples = np.concatenate(samples) data = np.concatenate(data) all_step_ratios = np.concatenate(all_step_ratios) kern_old = np.concatenate(kern_old) if hot_start == 2: # HOT START FROM COMPLETED RUN: if comm.rank == 0: print "HOT START from completed run" mdat = sio.loadmat(savefile) samples = mdat['samples'] data = mdat['data'] kern_old = np.squeeze(mdat['kern_old']) all_step_ratios = np.squeeze(mdat['step_ratios']) chain_length = samples.shape[0]/self.num_chains mdat_files = [] # reshape if parallel if comm.size > 1: samples = np.reshape(samples, (self.num_chains, chain_length, -1), 'F') data = np.reshape(data, (self.num_chains, chain_length, -1), 'F') all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, chain_length), 'F') # SPLIT DATA IF NECESSARY if comm.size > 1 and (hot_start == 2 or (hot_start == 1 and \ len(mdat_files) != comm.size)): # Use split to split along num_chains samples = np.reshape(np.split(samples, comm.size, 0)[comm.rank], (self.num_chains_pproc*chain_length, -1), 'F') data = np.reshape(np.split(data, comm.size, 0)[comm.rank], (self.num_chains_pproc*chain_length, -1), 'F') all_step_ratios = np.reshape(np.split(all_step_ratios, comm.size, 0)[comm.rank], (self.num_chains_pproc*chain_length,), 'F') kern_old = np.reshape(np.split(kern_old, comm.size, 0)[comm.rank], (self.num_chains_pproc,), 'F') else: all_step_ratios = np.reshape(all_step_ratios, (-1,), 'F') # Set samples, data, all_step_ratios, mdat, step_ratio, # MYsamples_old, and kern_old accordingly step_ratio = all_step_ratios[-self.num_chains_pproc:] MYsamples_old = samples[-self.num_chains_pproc:, :] # Determine how many batches have been run start_ind = samples.shape[0]/self.num_chains_pproc mdat = dict() self.update_mdict(mdat) for batch in xrange(start_ind, self.chain_length): # For each of N samples_old, create N new parameter samples using # transition set and step_ratio. Call these samples samples_new. samples_new = t_set.step(step_ratio, param_width, param_left, param_right, MYsamples_old) # Solve the model for the samples_new. data_new = self.lb_model(samples_new) # Make some decision about changing step_size(k). There are # multiple ways to do this. # Determine step size (kern_old, proposal) = kern.delta_step(data_new, kern_old) step_ratio = proposal*step_ratio # Is the ratio greater than max? step_ratio[step_ratio > max_ratio] = max_ratio # Is the ratio less than min? step_ratio[step_ratio < min_ratio] = min_ratio # Save and export concatentated arrays if self.chain_length < 4: pass elif comm.rank == 0 and (batch+1)%(self.chain_length/4) == 0: print "Current chain length: "+\ str(batch+1)+"/"+str(self.chain_length) samples = np.concatenate((samples, samples_new)) data = np.concatenate((data, data_new)) all_step_ratios = np.concatenate((all_step_ratios, step_ratio)) mdat['step_ratios'] = all_step_ratios mdat['samples'] = samples mdat['data'] = data mdat['kern_old'] = kern_old if comm.size > 1: super(sampler, self).save(mdat, psavefile) else: super(sampler, self).save(mdat, savefile) MYsamples_old = samples_new # collect everything MYsamples = np.copy(samples) MYdata = np.copy(data) MYall_step_ratios = np.copy(all_step_ratios) # ``parameter_samples`` is np.ndarray of shape (num_samples, ndim) samples = util.get_global_values(MYsamples, shape=(self.num_samples, np.shape(MYsamples)[1])) # and ``data_samples`` is np.ndarray of shape (num_samples, mdim) data = util.get_global_values(MYdata, shape=(self.num_samples, np.shape(MYdata)[1])) # ``all_step_ratios`` is np.ndarray of shape (num_chains, # chain_length) all_step_ratios = util.get_global_values(MYall_step_ratios, shape=(self.num_samples,)) all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, self.chain_length), 'F') # save everything mdat['step_ratios'] = all_step_ratios mdat['samples'] = samples mdat['data'] = data mdat['kern_old'] = util.get_global_values(kern_old, shape=(self.num_chains,)) super(sampler, self).save(mdat, savefile) return (samples, data, all_step_ratios)
def prob_mc(samples, data, rho_D_M, d_distr_samples, lambda_emulate=None, d_Tree=None): r""" Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples}})`, the probability assoicated with a set of voronoi cells defined by the model solves at :math:`(\lambda_{samples})` where the volumes of these voronoi cells are approximated using MC integration. :param samples: The samples in parameter space for which the model was run. :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim) :param data: The data from running the model given the samples. :type data: :class:`~numpy.ndarray` of size (num_samples, mdim) :param rho_D_M: The simple function approximation of rho_D :type rho_D_M: :class:`~numpy.ndarray` of shape (M,) :param d_distr_samples: The samples in the data space that define a parition of D to for the simple function approximation :type d_distr_samples: :class:`~numpy.ndarray` of shape (M, mdim) :param d_Tree: :class:`~scipy.spatial.KDTree` for d_distr_samples :param lambda_emulate: Samples used to partition the parameter space :rtype: tuple of :class:`~numpy.ndarray` of sizes (num_samples,), (num_samples,), (ndim, num_l_emulate), (num_samples,), (num_l_emulate,) :returns: (P, lam_vol, lambda_emulate, io_ptr, emulate_ptr) where P is the probability associated with samples, lam_vol the volumes associated with the samples, io_ptr a pointer from data to M bins, and emulate_ptr a pointer from emulated samples to samples (in parameter space) """ if len(samples.shape) == 1: samples = np.expand_dims(samples, axis=1) if len(data.shape) == 1: data = np.expand_dims(data, axis=1) if type(lambda_emulate) == type(None): lambda_emulate = samples if len(d_distr_samples.shape) == 1: d_distr_samples = np.expand_dims(d_distr_samples, axis=1) if type(d_Tree) == type(None): d_Tree = spatial.KDTree(d_distr_samples) # Determine which inputs go to which M bins using the QoI (_, io_ptr) = d_Tree.query(data) # Determine which emulated samples match with which model run samples l_Tree = spatial.KDTree(samples) (_, emulate_ptr) = l_Tree.query(lambda_emulate) # Apply the standard MC approximation to determine the number of emulated # samples per model run sample. This is for approximating # \mu_Lambda(A_i \intersect b_j) lam_vol = np.zeros((samples.shape[0],)) for i in range(samples.shape[0]): lam_vol[i] = np.sum(np.equal(emulate_ptr, i)) clam_vol = np.copy(lam_vol) comm.Allreduce([lam_vol, MPI.DOUBLE], [clam_vol, MPI.DOUBLE], op=MPI.SUM) lam_vol = clam_vol num_emulated = lambda_emulate.shape[0] num_emulated = comm.allreduce(num_emulated, op=MPI.SUM) lam_vol = lam_vol/(num_emulated) # Set up local arrays for parallelism local_index = range(0+comm.rank, samples.shape[0], comm.size) samples_local = samples[local_index, :] data_local = data[local_index, :] lam_vol_local = lam_vol[local_index] local_array = np.array(local_index, dtype='int64') # Determine which inputs go to which M bins using the QoI (_, io_ptr_local) = d_Tree.query(data_local) # Calculate Probabilities P_local = np.zeros((samples_local.shape[0],)) for i in range(rho_D_M.shape[0]): Itemp = np.equal(io_ptr_local, i) Itemp_sum = np.sum(lam_vol_local[Itemp]) Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM) if Itemp_sum > 0: P_local[Itemp] = rho_D_M[i]*lam_vol_local[Itemp]/Itemp_sum P_global = util.get_global_values(P_local) global_index = util.get_global_values(local_array) P = np.zeros(P_global.shape) P[global_index] = P_global[:] return (P, lam_vol, lambda_emulate, io_ptr, emulate_ptr)
def generalized_chains(self, input_obj, t_set, kern, savefile, initial_sample_type="random", criterion='center', hot_start=0): """ Basic adaptive sampling algorithm using generalized chains. .. todo:: Test HOTSTART from parallel files using different num proc :param string initial_sample_type: type of initial sample random (or r), latin hypercube(lhs), or space-filling curve(TBD) :param input_obj: Either a :class:`bet.sample.sample_set` object for an input space, an array of min and max bounds for the input values with ``min = input_domain[:, 0]`` and ``max = input_domain[:, 1]``, or the dimension of an input space :type input_obj: :class:`~bet.sample.sample_set`, :class:`numpy.ndarray` of shape (ndim, 2), or :class: `int` :param t_set: method for creating new parameter steps using given a step size based on the paramter domain size :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set` :param kern: functional that acts on the data used to determine the proposed change to the ``step_size`` :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object. :param string savefile: filename to save samples and data :param int hot_start: Flag whether or not hot start the sampling chains from a previous set of chains. Note that ``num_chains`` must be the same, but ``num_chains_pproc`` need not be the same. 0 - cold start, 1 - hot start from uncompleted run, 2 - hot start from finished run :param string criterion: latin hypercube criterion see `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_ :rtype: tuple :returns: (``discretization``, ``all_step_ratios``) where ``discretization`` is a :class:`~bet.sample.discretization` object containing ``num_samples`` and ``all_step_ratios`` is np.ndarray of shape ``(num_chains, chain_length)`` """ # Calculate step_size max_ratio = t_set.max_ratio min_ratio = t_set.min_ratio if not hot_start: logging.info("COLD START") step_ratio = t_set.init_ratio * np.ones(self.num_chains_pproc) # Initiative first batch of N samples (maybe taken from latin # hypercube/space-filling curve to fully explore parameter space - # not necessarily random). Call these Samples_old. disc_old = super(sampler, self).create_random_discretization( initial_sample_type, input_obj, savefile, self.num_chains, criterion, globalize=False) self.num_samples = self.chain_length * self.num_chains comm.Barrier() # populate local values #disc_old._input_sample_set.global_to_local() #disc_old._output_sample_set.global_to_local() input_old = disc_old._input_sample_set.copy() disc = disc_old.copy() all_step_ratios = step_ratio (kern_old, proposal) = kern.delta_step(disc_old.\ _output_sample_set.get_values_local(), None) start_ind = 1 if hot_start: # LOAD FILES _, disc, all_step_ratios, kern_old = loadmat( savefile, lb_model=None, hot_start=hot_start, num_chains=self.num_chains) # MAKE SURE ARRAYS ARE LOCALIZED FROM HERE ON OUT WILL ONLY # OPERATE ON _local_values # Set mdat, step_ratio, input_old, start_ind appropriately step_ratio = all_step_ratios[-self.num_chains_pproc:] input_old = sample.sample_set(disc._input_sample_set.get_dim()) input_old.set_domain(disc._input_sample_set.get_domain()) input_old.set_values_local(disc._input_sample_set.\ get_values_local()[-self.num_chains_pproc:, :]) # Determine how many batches have been run start_ind = disc._input_sample_set.get_values_local().\ shape[0]/self.num_chains_pproc mdat = dict() self.update_mdict(mdat) input_old.update_bounds_local() for batch in xrange(start_ind, self.chain_length): # For each of N samples_old, create N new parameter samples using # transition set and step_ratio. Call these samples input_new. input_new = t_set.step(step_ratio, input_old) # Solve the model for the input_new. output_new_values = self.lb_model(input_new.get_values_local()) # Make some decision about changing step_size(k). There are # multiple ways to do this. # Determine step size (kern_old, proposal) = kern.delta_step(output_new_values, kern_old) step_ratio = proposal * step_ratio # Is the ratio greater than max? step_ratio[step_ratio > max_ratio] = max_ratio # Is the ratio less than min? step_ratio[step_ratio < min_ratio] = min_ratio # Save and export concatentated arrays if self.chain_length < 4: pass elif comm.rank == 0 and (batch + 1) % (self.chain_length / 4) == 0: logging.info("Current chain length: "+\ str(batch+1)+"/"+str(self.chain_length)) disc._input_sample_set.append_values_local(input_new.\ get_values_local()) disc._output_sample_set.append_values_local(output_new_values) all_step_ratios = np.concatenate((all_step_ratios, step_ratio)) mdat['step_ratios'] = all_step_ratios mdat['kern_old'] = kern_old super(sampler, self).save(mdat, savefile, disc, globalize=False) input_old = input_new # collect everything disc._input_sample_set.update_bounds_local() #disc._input_sample_set.local_to_global() #disc._output_sample_set.local_to_global() MYall_step_ratios = np.copy(all_step_ratios) # ``all_step_ratios`` is np.ndarray of shape (num_chains, # chain_length) all_step_ratios = util.get_global_values(MYall_step_ratios, shape=(self.num_samples, )) all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, self.chain_length), 'F') # save everything mdat['step_ratios'] = all_step_ratios mdat['kern_old'] = util.get_global_values(kern_old, shape=(self.num_chains, )) super(sampler, self).save(mdat, savefile, disc, globalize=True) return (disc, all_step_ratios)
def generalized_chains(self, input_obj, t_set, kern, savefile, initial_sample_type="random", criterion='center', hot_start=0): """ Basic adaptive sampling algorithm using generalized chains. .. todo:: Test HOTSTART from parallel files using different num proc :param string initial_sample_type: type of initial sample random (or r), latin hypercube(lhs), or space-filling curve(TBD) :param input_obj: Either a :class:`bet.sample.sample_set` object for an input space, an array of min and max bounds for the input values with ``min = input_domain[:, 0]`` and ``max = input_domain[:, 1]``, or the dimension of an input space :type input_obj: :class:`~bet.sample.sample_set`, :class:`numpy.ndarray` of shape (ndim, 2), or :class: `int` :param t_set: method for creating new parameter steps using given a step size based on the paramter domain size :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set` :param kern: functional that acts on the data used to determine the proposed change to the ``step_size`` :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object. :param string savefile: filename to save samples and data :param int hot_start: Flag whether or not hot start the sampling chains from a previous set of chains. Note that ``num_chains`` must be the same, but ``num_chains_pproc`` need not be the same. 0 - cold start, 1 - hot start from uncompleted run, 2 - hot start from finished run :param string criterion: latin hypercube criterion see `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_ :rtype: tuple :returns: (``discretization``, ``all_step_ratios``) where ``discretization`` is a :class:`~bet.sample.discretization` object containing ``num_samples`` and ``all_step_ratios`` is np.ndarray of shape ``(num_chains, chain_length)`` """ # Calculate step_size max_ratio = t_set.max_ratio min_ratio = t_set.min_ratio if not hot_start: logging.info("COLD START") step_ratio = t_set.init_ratio*np.ones(self.num_chains_pproc) # Initiative first batch of N samples (maybe taken from latin # hypercube/space-filling curve to fully explore parameter space - # not necessarily random). Call these Samples_old. disc_old = super(sampler, self).create_random_discretization( initial_sample_type, input_obj, savefile, self.num_chains, criterion, globalize=False) self.num_samples = self.chain_length * self.num_chains comm.Barrier() # populate local values #disc_old._input_sample_set.global_to_local() #disc_old._output_sample_set.global_to_local() input_old = disc_old._input_sample_set.copy() disc = disc_old.copy() all_step_ratios = step_ratio (kern_old, proposal) = kern.delta_step(disc_old.\ _output_sample_set.get_values_local(), None) start_ind = 1 if hot_start: # LOAD FILES _, disc, all_step_ratios, kern_old = loadmat(savefile, lb_model=None, hot_start=hot_start, num_chains=self.num_chains) # MAKE SURE ARRAYS ARE LOCALIZED FROM HERE ON OUT WILL ONLY # OPERATE ON _local_values # Set mdat, step_ratio, input_old, start_ind appropriately step_ratio = all_step_ratios[-self.num_chains_pproc:] input_old = sample.sample_set(disc._input_sample_set.get_dim()) input_old.set_domain(disc._input_sample_set.get_domain()) input_old.set_values_local(disc._input_sample_set.\ get_values_local()[-self.num_chains_pproc:, :]) # Determine how many batches have been run start_ind = disc._input_sample_set.get_values_local().\ shape[0]/self.num_chains_pproc mdat = dict() self.update_mdict(mdat) input_old.update_bounds_local() for batch in xrange(start_ind, self.chain_length): # For each of N samples_old, create N new parameter samples using # transition set and step_ratio. Call these samples input_new. input_new = t_set.step(step_ratio, input_old) # Solve the model for the input_new. output_new_values = self.lb_model(input_new.get_values_local()) # Make some decision about changing step_size(k). There are # multiple ways to do this. # Determine step size (kern_old, proposal) = kern.delta_step(output_new_values, kern_old) step_ratio = proposal*step_ratio # Is the ratio greater than max? step_ratio[step_ratio > max_ratio] = max_ratio # Is the ratio less than min? step_ratio[step_ratio < min_ratio] = min_ratio # Save and export concatentated arrays if self.chain_length < 4: pass elif comm.rank == 0 and (batch+1)%(self.chain_length/4) == 0: logging.info("Current chain length: "+\ str(batch+1)+"/"+str(self.chain_length)) disc._input_sample_set.append_values_local(input_new.\ get_values_local()) disc._output_sample_set.append_values_local(output_new_values) all_step_ratios = np.concatenate((all_step_ratios, step_ratio)) mdat['step_ratios'] = all_step_ratios mdat['kern_old'] = kern_old super(sampler, self).save(mdat, savefile, disc, globalize=False) input_old = input_new # collect everything disc._input_sample_set.update_bounds_local() #disc._input_sample_set.local_to_global() #disc._output_sample_set.local_to_global() MYall_step_ratios = np.copy(all_step_ratios) # ``all_step_ratios`` is np.ndarray of shape (num_chains, # chain_length) all_step_ratios = util.get_global_values(MYall_step_ratios, shape=(self.num_samples,)) all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, self.chain_length), 'F') # save everything mdat['step_ratios'] = all_step_ratios mdat['kern_old'] = util.get_global_values(kern_old, shape=(self.num_chains,)) super(sampler, self).save(mdat, savefile, disc, globalize=True) return (disc, all_step_ratios)