def verify_user_samples(model, sampler, samples, savefile, parallel): # evalulate the model at the samples directly data = model(samples) # evaluate the model at the samples (my_samples, my_data) = sampler.user_samples(samples, savefile, parallel) if len(data.shape) == 1: data = np.expand_dims(data, axis=1) if len(samples.shape) == 1: samples = np.expand_dims(samples, axis=1) # compare the samples nptest.assert_array_equal(samples, my_samples) # compare the data nptest.assert_array_equal(data, my_data) # did num_samples get updated? assert samples.shape[0] == sampler.num_samples # did the file get correctly saved? if comm.rank == 0: mdat = sio.loadmat(savefile) nptest.assert_array_equal(samples, mdat['samples']) nptest.assert_array_equal(data, mdat['data']) comm.Barrier()
def verify_random_samples(model, sampler, sample_type, param_min, param_max, num_samples, savefile, parallel): # recreate the samples if num_samples == None: num_samples = sampler.num_samples param_left = np.repeat([param_min], num_samples, 0) param_right = np.repeat([param_max], num_samples, 0) samples = (param_right - param_left) if sample_type == "lhs": samples = samples * pyDOE.lhs(param_min.shape[-1], num_samples) elif sample_type == "random" or "r": np.random.seed(1) samples = samples * np.random.random(param_left.shape) samples = samples + param_left # evalulate the model at the samples directly data = model(samples) # evaluate the model at the samples # reset the random seed if sample_type == "random" or "r": np.random.seed(1) (my_samples, my_data) = sampler.user_samples(samples, savefile, parallel) # make sure that the samples are within the boundaries assert np.all(my_samples <= param_right) assert np.all(my_samples >= param_left) if len(data.shape) == 1: data = np.expand_dims(data, axis=1) if len(samples.shape) == 1: samples = np.expan_dims(samples, axis=1) # compare the samples nptest.assert_array_equal(samples, my_samples) # compare the data nptest.assert_array_equal(data, my_data) # did num_samples get updated? assert samples.shape[0] == sampler.num_samples assert num_samples == sampler.num_samples # did the file get correctly saved? if comm.rank == 0: mdat = sio.loadmat(savefile) nptest.assert_array_equal(samples, mdat['samples']) nptest.assert_array_equal(data, mdat['data']) comm.Barrier()
def find_good_sets(grad_tensor, good_sets_prev, unique_indices, num_optsets_return, cond_tol, volume): r""" #TODO: Use the idea we only know vectors are with 10% accuracy to guide inner_prod tol and condnum_tol. Given gradient vectors at each center in the parameter space and given good sets of size n - 1, return good sets of size n. That is, return sets of size n that have average condition number less than some tolerance. :param grad_tensor: Gradient vectors at each centers in the parameter space :math:'\Lambda' for each QoI map. :type grad_tensor: :class:`np.ndarray` of shape (num_centers,num_qois,Ldim) where num_centers is the number of points in :math:'\Lambda' we have approximated the gradient vectors, num_qois is the total number of possible QoIs to choose from, Ldim is the dimension of :math:`\Lambda`. :param good_sets_prev: Good sets of QoIs of size n - 1. :type good_sets_prev: :class:`np.ndarray` of size (num_good_sets_prev, n - 1) :param unique_indices: Unique QoIs to consider. :type unique_indices: :class:'np.ndarray' of size (num_unique_qois, 1) :param int num_optsets_return: Number of best sets to return :param float cond_tol: Throw out all sets of QoIs with average condition number greater than this. :param boolean volume: If volume is True, use ``calculate_avg_volume`` to determine optimal QoIs :rtype: tuple :returns: (good_sets, best_sets, optsingvals_tensor) where good sets has size (num_good_sets, n), best sets has size (num_optsets_return, n + 1) and optsingvals_tensor has size (num_centers, n, Lambda_dim) """ num_centers = grad_tensor.shape[0] Lambda_dim = grad_tensor.shape[2] num_qois_return = good_sets_prev.shape[1] + 1 comm.Barrier() # Initialize best sets and set all condition numbers large best_sets = np.zeros([num_optsets_return, num_qois_return + 1]) best_sets[:, 0] = np.inf good_sets = np.zeros([1, num_qois_return]) count_qois = 0 optsingvals_tensor = np.zeros( [num_centers, num_qois_return, num_optsets_return]) # For each good set of size n - 1, find the possible sets of size n and # compute the average condition number of each count_qois = 0 for i in range(good_sets_prev.shape[0]): min_ind = np.max(good_sets_prev[i, :]) # Find all possible combinations of QoIs that include this set of n - 1 if comm.rank == 0: inds_notin_set = util.fix_dimensions_vector_2darray(list(set(\ unique_indices) - set(good_sets_prev[i, :]))) # Choose only the QoI indices > min_ind so we do not repeat sets inds_notin_set = util.fix_dimensions_vector_2darray(inds_notin_set[\ inds_notin_set > min_ind]) qoi_combs = util.fix_dimensions_vector_2darray(np.append(np.tile(\ good_sets_prev[i, :], [inds_notin_set.shape[0], 1]), inds_notin_set, axis=1)) qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) # For each combination, compute the average condition number and add the # set to good_sets if it is less than cond_tol for qoi_set in range(len(qoi_combs)): count_qois += 1 curr_set = util.fix_dimensions_vector_2darray(qoi_combs[qoi_set])\ .transpose() if volume == False: (current_condnum, singvals) = calculate_avg_condnum(grad_tensor, qoi_combs[qoi_set]) else: (current_condnum, singvals) = calculate_avg_volume(grad_tensor, qoi_combs[qoi_set]) # If its a good set, add it to good_sets if current_condnum < cond_tol: good_sets = np.append(good_sets, curr_set, axis=0) # If the average condition number is less than the max condition # number in our best_sets, add it to best_sets if current_condnum < best_sets[-1, 0]: best_sets[-1, :] = np.append(np.array([current_condnum]), qoi_combs[qoi_set]) order = best_sets[:, 0].argsort() best_sets = best_sets[order] # Store the corresponding singular values optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] # Wait for all processes to get to this point comm.Barrier() # Gather the best sets and condition numbers from each processor good_sets = comm.gather(good_sets, root=0) best_sets = np.array(comm.gather(best_sets, root=0)) count_qois = np.array(comm.gather(count_qois, root=0)) # Find the num_optsets_return smallest condition numbers from all processors if comm.rank == 0: # Organize the best sets best_sets = best_sets.reshape(num_optsets_return * \ comm.size, num_qois_return + 1) [temp, uniq_inds_best] = np.unique(best_sets[:, 0], return_index=True) best_sets = best_sets[uniq_inds_best, :] best_sets = best_sets[best_sets[:, 0].argsort()] best_sets = best_sets[:num_optsets_return, :] # Organize the good sets good_sets_new = np.zeros([1, num_qois_return]) for each in good_sets: good_sets_new = np.append(good_sets_new, each[1:], axis=0) good_sets = good_sets_new print 'Possible sets of QoIs of size %i : '%good_sets.shape[1],\ np.sum(count_qois) print 'Good sets of QoIs of size %i : '%good_sets.shape[1],\ good_sets.shape[0] - 1 comm.Barrier() best_sets = comm.bcast(best_sets, root=0) good_sets = comm.bcast(good_sets, root=0) return (good_sets[1:].astype(int), best_sets, optsingvals_tensor)
def chooseOptQoIs_verbose(grad_tensor, qoiIndices=None, num_qois_return=None, num_optsets_return=None, inner_prod_tol=1.0, volume=False, remove_zeros=True): r""" Given gradient vectors at some points (centers) in the parameter space, a set of QoIs to choose from, and the number of desired QoIs to return, this method returns the ``num_optsets_return`` best sets of QoIs with with repsect to either the average condition number of the matrix formed by the gradient vectors of each QoI map, or the average volume of the inverse problem us this set of QoIs, computed as the product of the singular values of the same matrix. This method is brute force, i.e., if the method is given 10,000 QoIs and told to return the N best sets of 3, it will check all 10,000 choose 3 possible sets. See chooseOptQoIs_large for a less computationally expensive approach. :param grad_tensor: Gradient vectors at each point of interest in the parameter space :math:`\Lambda` for each QoI map. :type grad_tensor: :class:`np.ndarray` of shape (num_centers, num_qois, Lambda_dim) where num_centers is the number of points in :math:`\Lambda` we have approximated the gradient vectors and num_qois is the total number of possible QoIs to choose from :param qoiIndices: Set of QoIs to consider from grad_tensor. Default is range(0, grad_tensor.shape[1]) :type qoiIndices: :class:`np.ndarray` of size (1, num QoIs to consider) :param int num_qois_return: Number of desired QoIs to use in the inverse problem. Default is Lambda_dim :param int num_optsets_return: Number of best sets to return Default is 10 :param boolean volume: If volume is True, use ``calculate_avg_volume`` to determine optimal QoIs :param boolean remove_zeros: If True, ``find_unique_vecs`` will remove any QoIs that have a zero gradient vector at atleast one point in :math:`\Lambda`. :rtype: tuple :returns: (condnum_indices_mat, optsingvals) where condnum_indices_mat has shape (num_optsets_return, num_qois_return+1) and optsingvals has shape (num_centers, num_qois_return, num_optsets_return) """ num_centers = grad_tensor.shape[0] Lambda_dim = grad_tensor.shape[2] if qoiIndices is None: qoiIndices = range(0, grad_tensor.shape[1]) if num_qois_return is None: num_qois_return = Lambda_dim if num_optsets_return is None: num_optsets_return = 10 qoiIndices = find_unique_vecs(grad_tensor, inner_prod_tol, qoiIndices, remove_zeros) # Find all posible combinations of QoIs if comm.rank == 0: qoi_combs = np.array( list(combinations(list(qoiIndices), num_qois_return))) print 'Possible sets of QoIs : ', qoi_combs.shape[0] qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) # For each combination, check the skewness and keep the sets # that have the best skewness, i.e., smallest condition number condnum_indices_mat = np.zeros([num_optsets_return, num_qois_return + 1]) condnum_indices_mat[:, 0] = np.inf optsingvals_tensor = np.zeros( [num_centers, num_qois_return, num_optsets_return]) for qoi_set in range(len(qoi_combs)): if volume == False: (current_condnum, singvals) = calculate_avg_condnum(grad_tensor, qoi_combs[qoi_set]) else: (current_condnum, singvals) = calculate_avg_volume(grad_tensor, qoi_combs[qoi_set]) if current_condnum < condnum_indices_mat[-1, 0]: condnum_indices_mat[-1, :] = np.append(np.array([current_condnum]), qoi_combs[qoi_set]) order = condnum_indices_mat[:, 0].argsort() condnum_indices_mat = condnum_indices_mat[order] optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] # Wait for all processes to get to this point comm.Barrier() # Gather the best sets and condition numbers from each processor condnum_indices_mat = np.array(comm.gather(condnum_indices_mat, root=0)) optsingvals_tensor = np.array(comm.gather(optsingvals_tensor, root=0)) # Find the num_optsets_return smallest condition numbers from all processors if comm.rank == 0: condnum_indices_mat = condnum_indices_mat.reshape(num_optsets_return * \ comm.size, num_qois_return + 1) optsingvals_tensor = optsingvals_tensor.reshape( num_centers, num_qois_return, num_optsets_return * comm.size) order = condnum_indices_mat[:, 0].argsort() condnum_indices_mat = condnum_indices_mat[order] condnum_indices_mat = condnum_indices_mat[:num_optsets_return, :] optsingvals_tensor = optsingvals_tensor[:, :, order] optsingvals_tensor = optsingvals_tensor[:, :, :num_optsets_return] condnum_indices_mat = comm.bcast(condnum_indices_mat, root=0) optsingvals_tensor = comm.bcast(optsingvals_tensor, root=0) return (condnum_indices_mat, optsingvals_tensor)
def find_good_sets(input_set, good_sets_prev, unique_indices, num_optsets_return, measskew_tol, measure): r""" .. todo:: Use the idea we only know vectors are with 10% accuracy to guide inner_prod tol and skewness_tol. Given gradient vectors at each center in the parameter space and given good sets of size (n - 1), return good sets of size n. That is, return sets of size n that have average measure(skewness) less than some tolerance. :param input_set: The input sample set. Make sure the attribute _jacobians is not None. :type input_set: :class:`~bet.sample.sample_set` :param good_sets_prev: Good sets of QoIs of size n - 1. :type good_sets_prev: :class:`np.ndarray` of size (num_good_sets_prev, n - 1) :param unique_indices: Unique QoIs to consider. :type unique_indices: :class:`np.ndarray` of size (num_unique_qois, 1) :param int num_optsets_return: Number of best sets to return :param float measskew_tol: Throw out all sets of QoIs with average measure(skewness) number greater than this. :param boolean measure: If measure is True, use ``calculate_avg_measure`` to determine optimal QoIs, else use ``calculate_avg_skewness`` :rtype: tuple :returns: (good_sets, best_sets, optsingvals_tensor) where good sets has size (num_good_sets, n), best sets has size (num_optsets_return, n + 1) and optsingvals_tensor has size (num_centers, n, input_dim) """ if input_set._jacobians is None: raise ValueError("You must have jacobians to use this method.") num_centers = input_set._jacobians.shape[0] num_qois_return = good_sets_prev.shape[1] + 1 comm.Barrier() # Initialize best sets and set all skewness values large best_sets = np.zeros([num_optsets_return, num_qois_return + 1]) best_sets[:, 0] = np.inf good_sets = np.zeros([1, num_qois_return]) count_qois = 0 optsingvals_tensor = np.zeros( [num_centers, num_qois_return, num_optsets_return]) # For each good set of size (n - 1), find the possible sets of size n and # compute the average skewness of each count_qois = 0 for i in xrange(good_sets_prev.shape[0]): min_ind = np.max(good_sets_prev[i, :]) # Find all possible combinations of QoIs that include this set of # (n - 1) if comm.rank == 0: inds_notin_set = util.fix_dimensions_vector_2darray(list(set(\ unique_indices) - set(good_sets_prev[i, :]))) # Choose only the QoI indices > min_ind so we do not repeat sets inds_notin_set = util.fix_dimensions_vector_2darray(inds_notin_set[\ inds_notin_set > min_ind]) qoi_combs = util.fix_dimensions_vector_2darray(np.append(np.tile(\ good_sets_prev[i, :], [inds_notin_set.shape[0], 1]), inds_notin_set, axis=1)) qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) # For each combination, compute the average measure(skewness) and add # the set to good_sets if it is less than measskew_tol for qoi_set in xrange(len(qoi_combs)): count_qois += 1 curr_set = util.fix_dimensions_vector_2darray(qoi_combs[qoi_set])\ .transpose() if measure is False: (current_measskew, singvals) = calculate_avg_skewness(input_set, qoi_combs[qoi_set]) else: (current_measskew, singvals) = calculate_avg_measure(input_set, qoi_combs[qoi_set]) # If its a good set, add it to good_sets if current_measskew < measskew_tol: good_sets = np.append(good_sets, curr_set, axis=0) # If the average skewness is less than the maxskewness # in our best_sets, add it to best_sets if current_measskew < best_sets[-1, 0]: best_sets[-1, :] = np.append(np.array([current_measskew]), qoi_combs[qoi_set]) order = best_sets[:, 0].argsort() best_sets = best_sets[order] # Store the corresponding singular values optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] # Wait for all processes to get to this point comm.Barrier() # Gather the best sets and skewness values from each processor good_sets = comm.gather(good_sets, root=0) best_sets = np.array(comm.gather(best_sets, root=0)) count_qois = np.array(comm.gather(count_qois, root=0)) # Find the num_optsets_return smallest skewness from all processors if comm.rank == 0: # Organize the best sets best_sets = best_sets.reshape(num_optsets_return * \ comm.size, num_qois_return + 1) [_, uniq_inds_best] = np.unique(best_sets[:, 0], return_index=True) best_sets = best_sets[uniq_inds_best, :] best_sets = best_sets[best_sets[:, 0].argsort()] best_sets = best_sets[:num_optsets_return, :] # Organize the good sets good_sets_new = np.zeros([1, num_qois_return]) for each in good_sets: good_sets_new = np.append(good_sets_new, each[1:], axis=0) good_sets = good_sets_new logging.info('Possible sets of QoIs of size {} : {}'.format(\ good_sets.shape[1], np.sum(count_qois))) logging.info('Good sets of QoIs of size {} : {}'.format(\ good_sets.shape[1], good_sets.shape[0] - 1)) comm.Barrier() best_sets = comm.bcast(best_sets, root=0) good_sets = comm.bcast(good_sets, root=0) return (good_sets[1:].astype(int), best_sets, optsingvals_tensor)
def chooseOptQoIs_verbose(input_set, qoiIndices=None, num_qois_return=None, num_optsets_return=None, inner_prod_tol=1.0, measure=False, remove_zeros=True): r""" Given gradient vectors at some points (centers) in the parameter space, a set of QoIs to choose from, and the number of desired QoIs to return, this method returns the ``num_optsets_return`` best sets of QoIs with with repsect to either the average measure of the matrix formed by the gradient vectors of each QoI map, OR the average skewness of the inverse image of this set of QoIs, computed as the product of the singular values of the same matrix. This method is brute force, i.e., if the method is given 10,000 QoIs and told to return the N best sets of 3, it will check all 10,000 choose 3 possible sets. See chooseOptQoIs_large for a less computationally expensive approach. :param input_set: The input sample set. Make sure the attribute _jacobians is not None :type input_set: :class:`~bet.sample.sample_set` :param qoiIndices: Set of QoIs to consider. Default is xrange(0, input_set._jacobians.shape[1]) :type qoiIndices: :class:`np.ndarray` of size (1, num QoIs to consider) :param int num_qois_return: Number of desired QoIs to use in the inverse problem. Default is input_dim :param int num_optsets_return: Number of best sets to return Default is 10 :param boolean measure: If measure is True, use ``calculate_avg_measure`` to determine optimal QoIs, else use ``calculate_avg_skewness`` :param boolean remove_zeros: If True, ``find_unique_vecs`` will remove any QoIs that have a zero gradient :rtype: `np.ndarray` of shape (num_optsets_returned, num_qois_returned + 1) :returns: measure_skewness_indices_mat """ G = input_set._jacobians if G is None: raise ValueError("You must have jacobians to use this method.") input_dim = input_set._dim num_centers = G.shape[0] if qoiIndices is None: qoiIndices = xrange(0, G.shape[1]) if num_qois_return is None: num_qois_return = input_dim if num_optsets_return is None: num_optsets_return = 10 # Remove QoIs that have zero gradients at any of the centers qoiIndices = find_unique_vecs(input_set, inner_prod_tol, qoiIndices, remove_zeros) # Find all posible combinations of QoIs if comm.rank == 0: qoi_combs = np.array( list(combinations(list(qoiIndices), num_qois_return))) logging.info('Possible sets of QoIs : {}'.format(qoi_combs.shape[0])) qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) # For each combination, check the skewness and keep the sets # that have the smallest skewness measure_skewness_indices_mat = np.zeros( [num_optsets_return, num_qois_return + 1]) measure_skewness_indices_mat[:, 0] = np.inf optsingvals_tensor = np.zeros( [num_centers, num_qois_return, num_optsets_return]) for qoi_set in xrange(len(qoi_combs)): if measure == False: (current_measskew, singvals) = calculate_avg_skewness(input_set, qoi_combs[qoi_set]) else: (current_measskew, singvals) = calculate_avg_measure(input_set, qoi_combs[qoi_set]) if current_measskew < measure_skewness_indices_mat[-1, 0]: measure_skewness_indices_mat[-1, :] = np.append(np.array(\ [current_measskew]), qoi_combs[qoi_set]) order = measure_skewness_indices_mat[:, 0].argsort() measure_skewness_indices_mat = measure_skewness_indices_mat[order] optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] # Wait for all processes to get to this point comm.Barrier() # Gather the best sets and skewness values from each processor measure_skewness_indices_mat = np.array(comm.gather(\ measure_skewness_indices_mat, root=0)) optsingvals_tensor = np.array(comm.gather(optsingvals_tensor, root=0)) # Find the num_optsets_return smallest skewness values from all processors if comm.rank == 0: measure_skewness_indices_mat = measure_skewness_indices_mat.reshape(\ num_optsets_return * comm.size, num_qois_return + 1) optsingvals_tensor = optsingvals_tensor.reshape( num_centers, num_qois_return, num_optsets_return * comm.size) order = measure_skewness_indices_mat[:, 0].argsort() measure_skewness_indices_mat = measure_skewness_indices_mat[order] measure_skewness_indices_mat = measure_skewness_indices_mat[\ :num_optsets_return, :] optsingvals_tensor = optsingvals_tensor[:, :, order] optsingvals_tensor = optsingvals_tensor[:, :, :num_optsets_return] measure_skewness_indices_mat = comm.bcast(measure_skewness_indices_mat, root=0) optsingvals_tensor = comm.bcast(optsingvals_tensor, root=0) return (measure_skewness_indices_mat, optsingvals_tensor)
def generalized_chains(self, param_min, param_max, t_set, kern, savefile, initial_sample_type="lhs", criterion='center'): """ Basic adaptive sampling algorithm using generalized chains. :param string initial_sample_type: type of initial sample random (or r), latin hypercube(lhs), or space-filling curve(TBD) :param param_min: minimum value for each parameter dimension :type param_min: :class:`numpy.ndarray` (ndim,) :param param_max: maximum value for each parameter dimension :type param_max: :class:`numpy.ndarray` (ndim,) :param t_set: method for creating new parameter steps using given a step size based on the paramter domain size :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set` :param kern: functional that acts on the data used to determine the proposed change to the ``step_size`` :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object. :param string savefile: filename to save samples and data :param string criterion: latin hypercube criterion see `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_ :rtype: tuple :returns: (``parameter_samples``, ``data_samples``, ``all_step_ratios``) where ``parameter_samples`` is np.ndarray of shape (num_samples, ndim), ``data_samples`` is np.ndarray of shape (num_samples, mdim), and ``all_step_ratios`` is np.ndarray of shape (num_chains, chain_length) """ if comm.size > 1: psavefile = os.path.join( os.path.dirname(savefile), "proc{}{}".format(comm.rank, os.path.basename(savefile))) # Initialize Nx1 vector Step_size = something reasonable (based on size # of domain and transition set type) # Calculate domain size param_left = np.repeat([param_min], self.num_chains_pproc, 0) param_right = np.repeat([param_max], self.num_chains_pproc, 0) param_width = param_right - param_left # Calculate step_size max_ratio = t_set.max_ratio min_ratio = t_set.min_ratio step_ratio = t_set.init_ratio * np.ones(self.num_chains_pproc) # Initiative first batch of N samples (maybe taken from latin # hypercube/space-filling curve to fully explore parameter space - not # necessarily random). Call these Samples_old. (samples_old, data_old) = super(sampler, self).random_samples(initial_sample_type, param_min, param_max, savefile, self.num_chains, criterion) self.num_samples = self.chain_length * self.num_chains comm.Barrier() # now split it all up if comm.size > 1: MYsamples_old = np.empty((np.shape(samples_old)[0] / comm.size, np.shape(samples_old)[1])) comm.Scatter([samples_old, MPI.DOUBLE], [MYsamples_old, MPI.DOUBLE]) MYdata_old = np.empty( (np.shape(data_old)[0] / comm.size, np.shape(data_old)[1])) comm.Scatter([data_old, MPI.DOUBLE], [MYdata_old, MPI.DOUBLE]) else: MYsamples_old = np.copy(samples_old) MYdata_old = np.copy(data_old) samples = MYsamples_old data = MYdata_old all_step_ratios = step_ratio (kern_old, proposal) = kern.delta_step(MYdata_old, None) mdat = dict() self.update_mdict(mdat) for batch in xrange(1, self.chain_length): # For each of N samples_old, create N new parameter samples using # transition set and step_ratio. Call these samples samples_new. samples_new = t_set.step(step_ratio, param_width, param_left, param_right, MYsamples_old) # Solve the model for the samples_new. data_new = self.lb_model(samples_new) # Make some decision about changing step_size(k). There are # multiple ways to do this. # Determine step size (kern_old, proposal) = kern.delta_step(data_new, kern_old) step_ratio = proposal * step_ratio # Is the ratio greater than max? step_ratio[step_ratio > max_ratio] = max_ratio # Is the ratio less than min? step_ratio[step_ratio < min_ratio] = min_ratio # Save and export concatentated arrays if self.chain_length < 4: pass elif (batch + 1) % (self.chain_length / 4) == 0: print "Current chain length: " + str(batch + 1) + "/" + str( self.chain_length) samples = np.concatenate((samples, samples_new)) data = np.concatenate((data, data_new)) all_step_ratios = np.concatenate((all_step_ratios, step_ratio)) mdat['step_ratios'] = all_step_ratios mdat['samples'] = samples mdat['data'] = data if comm.size > 1: super(sampler, self).save(mdat, psavefile) else: super(sampler, self).save(mdat, savefile) MYsamples_old = samples_new # collect everything MYsamples = np.copy(samples) MYdata = np.copy(data) MYall_step_ratios = np.copy(all_step_ratios) # ``parameter_samples`` is np.ndarray of shape (num_samples, ndim) samples = util.get_global_values(MYsamples, shape=(self.num_samples, np.shape(MYsamples)[1])) # and ``data_samples`` is np.ndarray of shape (num_samples, mdim) data = util.get_global_values(MYdata, shape=(self.num_samples, np.shape(MYdata)[1])) # ``all_step_ratios`` is np.ndarray of shape (num_chains, # chain_length) all_step_ratios = util.get_global_values(MYall_step_ratios, shape=(self.num_samples, )) all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, self.chain_length)) # save everything mdat['step_ratios'] = all_step_ratios mdat['samples'] = samples mdat['data'] = data super(sampler, self).save(mdat, savefile) return (samples, data, all_step_ratios)
def generalized_chains(self, input_obj, t_set, kern, savefile, initial_sample_type="random", criterion='center', hot_start=0): """ Basic adaptive sampling algorithm using generalized chains. .. todo:: Test HOTSTART from parallel files using different num proc :param string initial_sample_type: type of initial sample random (or r), latin hypercube(lhs), or space-filling curve(TBD) :param input_obj: Either a :class:`bet.sample.sample_set` object for an input space, an array of min and max bounds for the input values with ``min = input_domain[:, 0]`` and ``max = input_domain[:, 1]``, or the dimension of an input space :type input_obj: :class:`~bet.sample.sample_set`, :class:`numpy.ndarray` of shape (ndim, 2), or :class: `int` :param t_set: method for creating new parameter steps using given a step size based on the paramter domain size :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set` :param kern: functional that acts on the data used to determine the proposed change to the ``step_size`` :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object. :param string savefile: filename to save samples and data :param int hot_start: Flag whether or not hot start the sampling chains from a previous set of chains. Note that ``num_chains`` must be the same, but ``num_chains_pproc`` need not be the same. 0 - cold start, 1 - hot start from uncompleted run, 2 - hot start from finished run :param string criterion: latin hypercube criterion see `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_ :rtype: tuple :returns: (``discretization``, ``all_step_ratios``) where ``discretization`` is a :class:`~bet.sample.discretization` object containing ``num_samples`` and ``all_step_ratios`` is np.ndarray of shape ``(num_chains, chain_length)`` """ # Calculate step_size max_ratio = t_set.max_ratio min_ratio = t_set.min_ratio if not hot_start: logging.info("COLD START") step_ratio = t_set.init_ratio * np.ones(self.num_chains_pproc) # Initiative first batch of N samples (maybe taken from latin # hypercube/space-filling curve to fully explore parameter space - # not necessarily random). Call these Samples_old. disc_old = super(sampler, self).create_random_discretization( initial_sample_type, input_obj, savefile, self.num_chains, criterion, globalize=False) self.num_samples = self.chain_length * self.num_chains comm.Barrier() # populate local values #disc_old._input_sample_set.global_to_local() #disc_old._output_sample_set.global_to_local() input_old = disc_old._input_sample_set.copy() disc = disc_old.copy() all_step_ratios = step_ratio (kern_old, proposal) = kern.delta_step(disc_old.\ _output_sample_set.get_values_local(), None) start_ind = 1 if hot_start: # LOAD FILES _, disc, all_step_ratios, kern_old = loadmat( savefile, lb_model=None, hot_start=hot_start, num_chains=self.num_chains) # MAKE SURE ARRAYS ARE LOCALIZED FROM HERE ON OUT WILL ONLY # OPERATE ON _local_values # Set mdat, step_ratio, input_old, start_ind appropriately step_ratio = all_step_ratios[-self.num_chains_pproc:] input_old = sample.sample_set(disc._input_sample_set.get_dim()) input_old.set_domain(disc._input_sample_set.get_domain()) input_old.set_values_local(disc._input_sample_set.\ get_values_local()[-self.num_chains_pproc:, :]) # Determine how many batches have been run start_ind = disc._input_sample_set.get_values_local().\ shape[0]/self.num_chains_pproc mdat = dict() self.update_mdict(mdat) input_old.update_bounds_local() for batch in xrange(start_ind, self.chain_length): # For each of N samples_old, create N new parameter samples using # transition set and step_ratio. Call these samples input_new. input_new = t_set.step(step_ratio, input_old) # Solve the model for the input_new. output_new_values = self.lb_model(input_new.get_values_local()) # Make some decision about changing step_size(k). There are # multiple ways to do this. # Determine step size (kern_old, proposal) = kern.delta_step(output_new_values, kern_old) step_ratio = proposal * step_ratio # Is the ratio greater than max? step_ratio[step_ratio > max_ratio] = max_ratio # Is the ratio less than min? step_ratio[step_ratio < min_ratio] = min_ratio # Save and export concatentated arrays if self.chain_length < 4: pass elif comm.rank == 0 and (batch + 1) % (self.chain_length / 4) == 0: logging.info("Current chain length: "+\ str(batch+1)+"/"+str(self.chain_length)) disc._input_sample_set.append_values_local(input_new.\ get_values_local()) disc._output_sample_set.append_values_local(output_new_values) all_step_ratios = np.concatenate((all_step_ratios, step_ratio)) mdat['step_ratios'] = all_step_ratios mdat['kern_old'] = kern_old super(sampler, self).save(mdat, savefile, disc, globalize=False) input_old = input_new # collect everything disc._input_sample_set.update_bounds_local() #disc._input_sample_set.local_to_global() #disc._output_sample_set.local_to_global() MYall_step_ratios = np.copy(all_step_ratios) # ``all_step_ratios`` is np.ndarray of shape (num_chains, # chain_length) all_step_ratios = util.get_global_values(MYall_step_ratios, shape=(self.num_samples, )) all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, self.chain_length), 'F') # save everything mdat['step_ratios'] = all_step_ratios mdat['kern_old'] = util.get_global_values(kern_old, shape=(self.num_chains, )) super(sampler, self).save(mdat, savefile, disc, globalize=True) return (disc, all_step_ratios)