def find_good_sets(grad_tensor, good_sets_prev, unique_indices, num_optsets_return, cond_tol, volume): r""" #TODO: Use the idea we only know vectors are with 10% accuracy to guide inner_prod tol and condnum_tol. Given gradient vectors at each center in the parameter space and given good sets of size n - 1, return good sets of size n. That is, return sets of size n that have average condition number less than some tolerance. :param grad_tensor: Gradient vectors at each centers in the parameter space :math:'\Lambda' for each QoI map. :type grad_tensor: :class:`np.ndarray` of shape (num_centers,num_qois,Ldim) where num_centers is the number of points in :math:'\Lambda' we have approximated the gradient vectors, num_qois is the total number of possible QoIs to choose from, Ldim is the dimension of :math:`\Lambda`. :param good_sets_prev: Good sets of QoIs of size n - 1. :type good_sets_prev: :class:`np.ndarray` of size (num_good_sets_prev, n - 1) :param unique_indices: Unique QoIs to consider. :type unique_indices: :class:'np.ndarray' of size (num_unique_qois, 1) :param int num_optsets_return: Number of best sets to return :param float cond_tol: Throw out all sets of QoIs with average condition number greater than this. :param boolean volume: If volume is True, use ``calculate_avg_volume`` to determine optimal QoIs :rtype: tuple :returns: (good_sets, best_sets, optsingvals_tensor) where good sets has size (num_good_sets, n), best sets has size (num_optsets_return, n + 1) and optsingvals_tensor has size (num_centers, n, Lambda_dim) """ num_centers = grad_tensor.shape[0] Lambda_dim = grad_tensor.shape[2] num_qois_return = good_sets_prev.shape[1] + 1 comm.Barrier() # Initialize best sets and set all condition numbers large best_sets = np.zeros([num_optsets_return, num_qois_return + 1]) best_sets[:, 0] = np.inf good_sets = np.zeros([1, num_qois_return]) count_qois = 0 optsingvals_tensor = np.zeros( [num_centers, num_qois_return, num_optsets_return]) # For each good set of size n - 1, find the possible sets of size n and # compute the average condition number of each count_qois = 0 for i in range(good_sets_prev.shape[0]): min_ind = np.max(good_sets_prev[i, :]) # Find all possible combinations of QoIs that include this set of n - 1 if comm.rank == 0: inds_notin_set = util.fix_dimensions_vector_2darray(list(set(\ unique_indices) - set(good_sets_prev[i, :]))) # Choose only the QoI indices > min_ind so we do not repeat sets inds_notin_set = util.fix_dimensions_vector_2darray(inds_notin_set[\ inds_notin_set > min_ind]) qoi_combs = util.fix_dimensions_vector_2darray(np.append(np.tile(\ good_sets_prev[i, :], [inds_notin_set.shape[0], 1]), inds_notin_set, axis=1)) qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) # For each combination, compute the average condition number and add the # set to good_sets if it is less than cond_tol for qoi_set in range(len(qoi_combs)): count_qois += 1 curr_set = util.fix_dimensions_vector_2darray(qoi_combs[qoi_set])\ .transpose() if volume == False: (current_condnum, singvals) = calculate_avg_condnum(grad_tensor, qoi_combs[qoi_set]) else: (current_condnum, singvals) = calculate_avg_volume(grad_tensor, qoi_combs[qoi_set]) # If its a good set, add it to good_sets if current_condnum < cond_tol: good_sets = np.append(good_sets, curr_set, axis=0) # If the average condition number is less than the max condition # number in our best_sets, add it to best_sets if current_condnum < best_sets[-1, 0]: best_sets[-1, :] = np.append(np.array([current_condnum]), qoi_combs[qoi_set]) order = best_sets[:, 0].argsort() best_sets = best_sets[order] # Store the corresponding singular values optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] # Wait for all processes to get to this point comm.Barrier() # Gather the best sets and condition numbers from each processor good_sets = comm.gather(good_sets, root=0) best_sets = np.array(comm.gather(best_sets, root=0)) count_qois = np.array(comm.gather(count_qois, root=0)) # Find the num_optsets_return smallest condition numbers from all processors if comm.rank == 0: # Organize the best sets best_sets = best_sets.reshape(num_optsets_return * \ comm.size, num_qois_return + 1) [temp, uniq_inds_best] = np.unique(best_sets[:, 0], return_index=True) best_sets = best_sets[uniq_inds_best, :] best_sets = best_sets[best_sets[:, 0].argsort()] best_sets = best_sets[:num_optsets_return, :] # Organize the good sets good_sets_new = np.zeros([1, num_qois_return]) for each in good_sets: good_sets_new = np.append(good_sets_new, each[1:], axis=0) good_sets = good_sets_new print 'Possible sets of QoIs of size %i : '%good_sets.shape[1],\ np.sum(count_qois) print 'Good sets of QoIs of size %i : '%good_sets.shape[1],\ good_sets.shape[0] - 1 comm.Barrier() best_sets = comm.bcast(best_sets, root=0) good_sets = comm.bcast(good_sets, root=0) return (good_sets[1:].astype(int), best_sets, optsingvals_tensor)
def loadmat(save_file, lb_model=None, hot_start=None, num_chains=None): """ Loads data from ``save_file`` into a :class:`~bet.sampling.adaptiveSampling.sampler` object. :param string save_file: file name :param lb_model: runs the model at a given set of parameter samples, (N, ndim), and returns data (N, mdim) :param int hot_start: Flag whether or not hot start the sampling chains from a previous set of chains. Note that ``num_chains`` must be the same, but ``num_chains_pproc`` need not be the same. 0 - cold start, 1 - hot start from uncompleted run, 2 - hot start from finished run :param int num_chains: total number of chains of samples :param callable lb_model: runs the model at a given set of parameter samples, (N, ndim), and returns data (N, mdim) :rtype: tuple of (:class:`bet.sampling.adaptiveSampling.sampler`, :class:`bet.sample.discretization`, :class:`numpy.ndarray`, :class:`numpy.ndarray`) :returns: (``sampler``, ``discretization``, ``all_step_ratios``, ``kern_old``) """ print hot_start if hot_start is None: hot_start = 1 # LOAD FILES if hot_start == 1: # HOT START FROM PARTIAL RUN if comm.rank == 0: logging.info("HOT START from partial run") # Find and open save files save_dir = os.path.dirname(save_file) base_name = os.path.basename(save_file) mdat_files = glob.glob(os.path.join(save_dir, "proc*_{}".format(base_name))) if len(mdat_files) > 0: tmp_mdat = sio.loadmat(mdat_files[0]) else: tmp_mdat = sio.loadmat(save_file) if num_chains is None: num_chains = np.squeeze(tmp_mdat['num_chains']) num_chains_pproc = num_chains / comm.size if len(mdat_files) == 0: logging.info("HOT START using serial file") mdat = sio.loadmat(save_file) if num_chains is None: num_chains = np.squeeze(mdat['num_chains']) num_chains_pproc = num_chains / comm.size disc = sample.load_discretization(save_file) kern_old = np.squeeze(mdat['kern_old']) all_step_ratios = np.squeeze(mdat['step_ratios']) chain_length = disc.check_nums()/num_chains if all_step_ratios.shape == (num_chains, chain_length): msg = "Serial file, from completed" msg += " run updating hot_start" hot_start = 2 # reshape if parallel if comm.size > 1: temp_input = np.reshape(disc._input_sample_set.\ get_values(), (num_chains, chain_length, -1), 'F') temp_output = np.reshape(disc._output_sample_set.\ get_values(), (num_chains, chain_length, -1), 'F') all_step_ratios = np.reshape(all_step_ratios, (num_chains, -1), 'F') elif hot_start == 1 and len(mdat_files) == comm.size: logging.info("HOT START using parallel files (same nproc)") # if the number of processors is the same then set mdat to # be the one with the matching processor number (doesn't # really matter) disc = sample.load_discretization(mdat_files[comm.rank]) kern_old = np.squeeze(tmp_mdat['kern_old']) all_step_ratios = np.squeeze(tmp_mdat['step_ratios']) elif hot_start == 1 and len(mdat_files) != comm.size: logging.info("HOT START using parallel files (diff nproc)") # Determine how many processors the previous data used # otherwise gather the data from mdat and then scatter # among the processors and update mdat mdat_files_local = comm.scatter(mdat_files) mdat_local = [sio.loadmat(m) for m in mdat_files_local] disc_local = [sample.load_discretization(m) for m in\ mdat_files_local] mdat_list = comm.allgather(mdat_local) disc_list = comm.allgather(disc_local) mdat_global = [] disc_global = [] # instead of a list of lists, create a list of mdat for mlist, dlist in zip(mdat_list, disc_list): mdat_global.extend(mlist) disc_global.extend(dlist) # get num_proc and num_chains_pproc for previous run old_num_proc = max((len(mdat_list), 1)) old_num_chains_pproc = num_chains/old_num_proc # get batch size and/or number of dimensions chain_length = disc_global[0].check_nums()/\ old_num_chains_pproc disc = disc_global[0].copy() # create lists of local data temp_input = [] temp_output = [] all_step_ratios = [] kern_old = [] # RESHAPE old_num_chains_pproc, chain_length(or batch), dim for mdat, disc_local in zip(mdat_global, disc_local): temp_input.append(np.reshape(disc_local.\ _input_sample_set.get_values_local(), (old_num_chains_pproc, chain_length, -1), 'F')) temp_output.append(np.reshape(disc_local.\ _output_sample_set.get_values_local(), (old_num_chains_pproc, chain_length, -1), 'F')) all_step_ratios.append(np.reshape(mdat['step_ratios'], (old_num_chains_pproc, chain_length, -1), 'F')) kern_old.append(np.reshape(mdat['kern_old'], (old_num_chains_pproc,), 'F')) # turn into arrays temp_input = np.concatenate(temp_input) temp_output = np.concatenate(temp_output) all_step_ratios = np.concatenate(all_step_ratios) kern_old = np.concatenate(kern_old) if hot_start == 2: # HOT START FROM COMPLETED RUN: if comm.rank == 0: logging.info("HOT START from completed run") mdat = sio.loadmat(save_file) if num_chains is None: num_chains = np.squeeze(mdat['num_chains']) num_chains_pproc = num_chains / comm.size disc = sample.load_discretization(save_file) kern_old = np.squeeze(mdat['kern_old']) all_step_ratios = np.squeeze(mdat['step_ratios']) chain_length = disc.check_nums()/num_chains # reshape if parallel if comm.size > 1: temp_input = np.reshape(disc._input_sample_set.\ get_values(), (num_chains, chain_length, -1), 'F') temp_output = np.reshape(disc._output_sample_set.\ get_values(), (num_chains, chain_length, -1), 'F') all_step_ratios = np.reshape(all_step_ratios, (num_chains, chain_length), 'F') # SPLIT DATA IF NECESSARY if comm.size > 1 and (hot_start == 2 or (hot_start == 1 and \ len(mdat_files) != comm.size)): # Use split to split along num_chains and set *._values_local disc._input_sample_set.set_values_local(np.reshape(np.split(\ temp_input, comm.size, 0)[comm.rank], (num_chains_pproc*chain_length, -1), 'F')) disc._output_sample_set.set_values_local(np.reshape(np.split(\ temp_output, comm.size, 0)[comm.rank], (num_chains_pproc*chain_length, -1), 'F')) all_step_ratios = np.reshape(np.split(all_step_ratios, comm.size, 0)[comm.rank], (num_chains_pproc*chain_length,), 'F') kern_old = np.reshape(np.split(kern_old, comm.size, 0)[comm.rank], (num_chains_pproc,), 'F') else: all_step_ratios = np.reshape(all_step_ratios, (-1,), 'F') print chain_length*num_chains, chain_length, lb_model new_sampler = sampler(chain_length*num_chains, chain_length, lb_model) return (new_sampler, disc, all_step_ratios, kern_old)
def chooseOptQoIs_verbose(grad_tensor, qoiIndices=None, num_qois_return=None, num_optsets_return=None, inner_prod_tol=1.0, volume=False, remove_zeros=True): r""" Given gradient vectors at some points (centers) in the parameter space, a set of QoIs to choose from, and the number of desired QoIs to return, this method returns the ``num_optsets_return`` best sets of QoIs with with repsect to either the average condition number of the matrix formed by the gradient vectors of each QoI map, or the average volume of the inverse problem us this set of QoIs, computed as the product of the singular values of the same matrix. This method is brute force, i.e., if the method is given 10,000 QoIs and told to return the N best sets of 3, it will check all 10,000 choose 3 possible sets. See chooseOptQoIs_large for a less computationally expensive approach. :param grad_tensor: Gradient vectors at each point of interest in the parameter space :math:`\Lambda` for each QoI map. :type grad_tensor: :class:`np.ndarray` of shape (num_centers, num_qois, Lambda_dim) where num_centers is the number of points in :math:`\Lambda` we have approximated the gradient vectors and num_qois is the total number of possible QoIs to choose from :param qoiIndices: Set of QoIs to consider from grad_tensor. Default is range(0, grad_tensor.shape[1]) :type qoiIndices: :class:`np.ndarray` of size (1, num QoIs to consider) :param int num_qois_return: Number of desired QoIs to use in the inverse problem. Default is Lambda_dim :param int num_optsets_return: Number of best sets to return Default is 10 :param boolean volume: If volume is True, use ``calculate_avg_volume`` to determine optimal QoIs :param boolean remove_zeros: If True, ``find_unique_vecs`` will remove any QoIs that have a zero gradient vector at atleast one point in :math:`\Lambda`. :rtype: tuple :returns: (condnum_indices_mat, optsingvals) where condnum_indices_mat has shape (num_optsets_return, num_qois_return+1) and optsingvals has shape (num_centers, num_qois_return, num_optsets_return) """ num_centers = grad_tensor.shape[0] Lambda_dim = grad_tensor.shape[2] if qoiIndices is None: qoiIndices = range(0, grad_tensor.shape[1]) if num_qois_return is None: num_qois_return = Lambda_dim if num_optsets_return is None: num_optsets_return = 10 qoiIndices = find_unique_vecs(grad_tensor, inner_prod_tol, qoiIndices, remove_zeros) # Find all posible combinations of QoIs if comm.rank == 0: qoi_combs = np.array( list(combinations(list(qoiIndices), num_qois_return))) print 'Possible sets of QoIs : ', qoi_combs.shape[0] qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) # For each combination, check the skewness and keep the sets # that have the best skewness, i.e., smallest condition number condnum_indices_mat = np.zeros([num_optsets_return, num_qois_return + 1]) condnum_indices_mat[:, 0] = np.inf optsingvals_tensor = np.zeros( [num_centers, num_qois_return, num_optsets_return]) for qoi_set in range(len(qoi_combs)): if volume == False: (current_condnum, singvals) = calculate_avg_condnum(grad_tensor, qoi_combs[qoi_set]) else: (current_condnum, singvals) = calculate_avg_volume(grad_tensor, qoi_combs[qoi_set]) if current_condnum < condnum_indices_mat[-1, 0]: condnum_indices_mat[-1, :] = np.append(np.array([current_condnum]), qoi_combs[qoi_set]) order = condnum_indices_mat[:, 0].argsort() condnum_indices_mat = condnum_indices_mat[order] optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] # Wait for all processes to get to this point comm.Barrier() # Gather the best sets and condition numbers from each processor condnum_indices_mat = np.array(comm.gather(condnum_indices_mat, root=0)) optsingvals_tensor = np.array(comm.gather(optsingvals_tensor, root=0)) # Find the num_optsets_return smallest condition numbers from all processors if comm.rank == 0: condnum_indices_mat = condnum_indices_mat.reshape(num_optsets_return * \ comm.size, num_qois_return + 1) optsingvals_tensor = optsingvals_tensor.reshape( num_centers, num_qois_return, num_optsets_return * comm.size) order = condnum_indices_mat[:, 0].argsort() condnum_indices_mat = condnum_indices_mat[order] condnum_indices_mat = condnum_indices_mat[:num_optsets_return, :] optsingvals_tensor = optsingvals_tensor[:, :, order] optsingvals_tensor = optsingvals_tensor[:, :, :num_optsets_return] condnum_indices_mat = comm.bcast(condnum_indices_mat, root=0) optsingvals_tensor = comm.bcast(optsingvals_tensor, root=0) return (condnum_indices_mat, optsingvals_tensor)
def find_good_sets(input_set, good_sets_prev, unique_indices, num_optsets_return, measskew_tol, measure): r""" .. todo:: Use the idea we only know vectors are with 10% accuracy to guide inner_prod tol and skewness_tol. Given gradient vectors at each center in the parameter space and given good sets of size (n - 1), return good sets of size n. That is, return sets of size n that have average measure(skewness) less than some tolerance. :param input_set: The input sample set. Make sure the attribute _jacobians is not None. :type input_set: :class:`~bet.sample.sample_set` :param good_sets_prev: Good sets of QoIs of size n - 1. :type good_sets_prev: :class:`np.ndarray` of size (num_good_sets_prev, n - 1) :param unique_indices: Unique QoIs to consider. :type unique_indices: :class:`np.ndarray` of size (num_unique_qois, 1) :param int num_optsets_return: Number of best sets to return :param float measskew_tol: Throw out all sets of QoIs with average measure(skewness) number greater than this. :param boolean measure: If measure is True, use ``calculate_avg_measure`` to determine optimal QoIs, else use ``calculate_avg_skewness`` :rtype: tuple :returns: (good_sets, best_sets, optsingvals_tensor) where good sets has size (num_good_sets, n), best sets has size (num_optsets_return, n + 1) and optsingvals_tensor has size (num_centers, n, input_dim) """ if input_set._jacobians is None: raise ValueError("You must have jacobians to use this method.") num_centers = input_set._jacobians.shape[0] num_qois_return = good_sets_prev.shape[1] + 1 comm.Barrier() # Initialize best sets and set all skewness values large best_sets = np.zeros([num_optsets_return, num_qois_return + 1]) best_sets[:, 0] = np.inf good_sets = np.zeros([1, num_qois_return]) count_qois = 0 optsingvals_tensor = np.zeros( [num_centers, num_qois_return, num_optsets_return]) # For each good set of size (n - 1), find the possible sets of size n and # compute the average skewness of each count_qois = 0 for i in xrange(good_sets_prev.shape[0]): min_ind = np.max(good_sets_prev[i, :]) # Find all possible combinations of QoIs that include this set of # (n - 1) if comm.rank == 0: inds_notin_set = util.fix_dimensions_vector_2darray(list(set(\ unique_indices) - set(good_sets_prev[i, :]))) # Choose only the QoI indices > min_ind so we do not repeat sets inds_notin_set = util.fix_dimensions_vector_2darray(inds_notin_set[\ inds_notin_set > min_ind]) qoi_combs = util.fix_dimensions_vector_2darray(np.append(np.tile(\ good_sets_prev[i, :], [inds_notin_set.shape[0], 1]), inds_notin_set, axis=1)) qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) # For each combination, compute the average measure(skewness) and add # the set to good_sets if it is less than measskew_tol for qoi_set in xrange(len(qoi_combs)): count_qois += 1 curr_set = util.fix_dimensions_vector_2darray(qoi_combs[qoi_set])\ .transpose() if measure is False: (current_measskew, singvals) = calculate_avg_skewness(input_set, qoi_combs[qoi_set]) else: (current_measskew, singvals) = calculate_avg_measure(input_set, qoi_combs[qoi_set]) # If its a good set, add it to good_sets if current_measskew < measskew_tol: good_sets = np.append(good_sets, curr_set, axis=0) # If the average skewness is less than the maxskewness # in our best_sets, add it to best_sets if current_measskew < best_sets[-1, 0]: best_sets[-1, :] = np.append(np.array([current_measskew]), qoi_combs[qoi_set]) order = best_sets[:, 0].argsort() best_sets = best_sets[order] # Store the corresponding singular values optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] # Wait for all processes to get to this point comm.Barrier() # Gather the best sets and skewness values from each processor good_sets = comm.gather(good_sets, root=0) best_sets = np.array(comm.gather(best_sets, root=0)) count_qois = np.array(comm.gather(count_qois, root=0)) # Find the num_optsets_return smallest skewness from all processors if comm.rank == 0: # Organize the best sets best_sets = best_sets.reshape(num_optsets_return * \ comm.size, num_qois_return + 1) [_, uniq_inds_best] = np.unique(best_sets[:, 0], return_index=True) best_sets = best_sets[uniq_inds_best, :] best_sets = best_sets[best_sets[:, 0].argsort()] best_sets = best_sets[:num_optsets_return, :] # Organize the good sets good_sets_new = np.zeros([1, num_qois_return]) for each in good_sets: good_sets_new = np.append(good_sets_new, each[1:], axis=0) good_sets = good_sets_new logging.info('Possible sets of QoIs of size {} : {}'.format(\ good_sets.shape[1], np.sum(count_qois))) logging.info('Good sets of QoIs of size {} : {}'.format(\ good_sets.shape[1], good_sets.shape[0] - 1)) comm.Barrier() best_sets = comm.bcast(best_sets, root=0) good_sets = comm.bcast(good_sets, root=0) return (good_sets[1:].astype(int), best_sets, optsingvals_tensor)
def chooseOptQoIs_verbose(grad_tensor, qoiIndices=None, num_qois_return=None, num_optsets_return=None): r""" Given gradient vectors at some points(centers) in the parameter space, a set of QoIs to choose from, and the number of desired QoIs to return, this method return the set of optimal QoIs to use in the inverse problem by choosing the set with optimal skewness properties. Also a tensor that represents the singualre values of the matrices formed by the gradient vectors of the optimal QoIs at each center is returned. :param grad_tensor: Gradient vectors at each point of interest in the parameter space :math:`\Lambda` for each QoI map. :type grad_tensor: :class:`np.ndarray` of shape (num_centers, num_qois, Lambda_dim) where num_centers is the number of points in :math:`\Lambda` we have approximated the gradient vectors and num_qois is the total number of possible QoIs to choose from :param qoiIndices: Set of QoIs to consider from grad_tensor. Default is range(0, grad_tensor.shape[1]) :type qoiIndices: :class:`np.ndarray` of size (1, num QoIs to consider) :param int num_qois_return: Number of desired QoIs to use in the inverse problem. Default is Lambda_dim :param int num_optsets_return: Number of best sets to return Default is 10 :rtype: tuple :returns: (condnum_indices_mat, optsingvals) where condnum_indices_mat has shape (num_optsets_return, num_qois_return+1) and optsingvals has shape (num_centers, num_qois_return, num_optsets_return) """ num_centers = grad_tensor.shape[0] Lambda_dim = grad_tensor.shape[2] if qoiIndices is None: qoiIndices = range(0, grad_tensor.shape[1]) if num_qois_return is None: num_qois_return = Lambda_dim if num_optsets_return is None: num_optsets_return = 10 # Find all posible combinations of QoIs if comm.rank == 0: qoi_combs = np.array(list(combinations(list(qoiIndices), num_qois_return))) print "Possible sets of QoIs : ", qoi_combs.shape[0] qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) # For each combination, check the skewness and keep the sets # that have the best skewness, i.e., smallest condition number condnum_indices_mat = np.zeros([num_optsets_return, num_qois_return + 1]) condnum_indices_mat[:, 0] = 1e11 optsingvals_tensor = np.zeros([num_centers, num_qois_return, num_optsets_return]) for qoi_set in range(len(qoi_combs)): singvals = np.linalg.svd(grad_tensor[:, qoi_combs[qoi_set], :], compute_uv=False) # Find the centers that have atleast one zero sinular value indz = singvals[:, -1] == 0 indnz = singvals[:, -1] != 0 current_condnum = ( np.sum(singvals[indnz, 0] / singvals[indnz, -1], axis=0) + 1e9 * np.sum(indz) ) / singvals.shape[0] if current_condnum < condnum_indices_mat[-1, 0]: condnum_indices_mat[-1, :] = np.append(np.array([current_condnum]), qoi_combs[qoi_set]) order = condnum_indices_mat[:, 0].argsort() condnum_indices_mat = condnum_indices_mat[order] optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] # Wait for all processes to get to this point comm.Barrier() # Gather the best sets and condition numbers from each processor condnum_indices_mat = np.array(comm.gather(condnum_indices_mat, root=0)) optsingvals_tensor = np.array(comm.gather(optsingvals_tensor, root=0)) # Find the num_optsets_return smallest condition numbers from all processors if comm.rank == 0: condnum_indices_mat = condnum_indices_mat.reshape(num_optsets_return * comm.size, num_qois_return + 1) optsingvals_tensor = optsingvals_tensor.reshape(num_centers, num_qois_return, num_optsets_return * comm.size) order = condnum_indices_mat[:, 0].argsort() condnum_indices_mat = condnum_indices_mat[order] condnum_indices_mat = condnum_indices_mat[:num_optsets_return, :] optsingvals_tensor = optsingvals_tensor[:, :, order] optsingvals_tensor = optsingvals_tensor[:, :, :num_optsets_return] condnum_indices_mat = comm.bcast(condnum_indices_mat, root=0) optsingvals_tensor = comm.bcast(optsingvals_tensor, root=0) return (condnum_indices_mat, optsingvals_tensor)
def find_good_sets(grad_tensor, good_sets_prev, unique_indices, num_optsets_return, cond_tol, volume): r""" #TODO: Use the idea we only know vectors are with 10% accuracy to guide inner_prod tol and condnum_tol. Given gradient vectors at each center in the parameter space and given good sets of size n - 1, return good sets of size n. That is, return sets of size n that have average condition number less than some tolerance. :param grad_tensor: Gradient vectors at each centers in the parameter space :math:'\Lambda' for each QoI map. :type grad_tensor: :class:`np.ndarray` of shape (num_centers,num_qois,Ldim) where num_centers is the number of points in :math:'\Lambda' we have approximated the gradient vectors, num_qois is the total number of possible QoIs to choose from, Ldim is the dimension of :math:`\Lambda`. :param good_sets_prev: Good sets of QoIs of size n - 1. :type good_sets_prev: :class:`np.ndarray` of size (num_good_sets_prev, n - 1) :param unique_indices: Unique QoIs to consider. :type unique_indices: :class:'np.ndarray' of size (num_unique_qois, 1) :param int num_optsets_return: Number of best sets to return :param float cond_tol: Throw out all sets of QoIs with average condition number greater than this. :param boolean volume: If volume is True, use ``calculate_avg_volume`` to determine optimal QoIs :rtype: tuple :returns: (good_sets, best_sets, optsingvals_tensor) where good sets has size (num_good_sets, n), best sets has size (num_optsets_return, n + 1) and optsingvals_tensor has size (num_centers, n, Lambda_dim) """ num_centers = grad_tensor.shape[0] Lambda_dim = grad_tensor.shape[2] num_qois_return = good_sets_prev.shape[1] + 1 comm.Barrier() # Initialize best sets and set all condition numbers large best_sets = np.zeros([num_optsets_return, num_qois_return + 1]) best_sets[:, 0] = 1E99 good_sets = np.zeros([1, num_qois_return]) count_qois = 0 optsingvals_tensor = np.zeros([num_centers, num_qois_return, num_optsets_return]) # For each good set of size n - 1, find the possible sets of size n and # compute the average condition number of each count_qois = 0 for i in range(good_sets_prev.shape[0]): min_ind = np.max(good_sets_prev[i, :]) # Find all possible combinations of QoIs that include this set of n - 1 if comm.rank == 0: inds_notin_set = util.fix_dimensions_vector_2darray(list(set(\ unique_indices) - set(good_sets_prev[i, :]))) # Choose only the QoI indices > min_ind so we do not repeat sets inds_notin_set = util.fix_dimensions_vector_2darray(inds_notin_set[\ inds_notin_set > min_ind]) qoi_combs = util.fix_dimensions_vector_2darray(np.append(np.tile(\ good_sets_prev[i, :], [inds_notin_set.shape[0], 1]), inds_notin_set, axis=1)) qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) # For each combination, compute the average condition number and add the # set to good_sets if it is less than cond_tol for qoi_set in range(len(qoi_combs)): count_qois += 1 curr_set = util.fix_dimensions_vector_2darray(qoi_combs[qoi_set])\ .transpose() if volume == False: (current_condnum, singvals) = calculate_avg_condnum(grad_tensor, qoi_combs[qoi_set]) else: (current_condnum, singvals) = calculate_avg_volume(grad_tensor, qoi_combs[qoi_set]) # If its a good set, add it to good_sets if current_condnum < cond_tol: good_sets = np.append(good_sets, curr_set, axis=0) # If the average condition number is less than the max condition # number in our best_sets, add it to best_sets if current_condnum < best_sets[-1, 0]: best_sets[-1, :] = np.append(np.array([current_condnum]), qoi_combs[qoi_set]) order = best_sets[:, 0].argsort() best_sets = best_sets[order] # Store the corresponding singular values optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] # Wait for all processes to get to this point comm.Barrier() # Gather the best sets and condition numbers from each processor good_sets = comm.gather(good_sets, root=0) best_sets = np.array(comm.gather(best_sets, root=0)) count_qois = np.array(comm.gather(count_qois, root=0)) # Find the num_optsets_return smallest condition numbers from all processors if comm.rank == 0: # Organize the best sets best_sets = best_sets.reshape(num_optsets_return * \ comm.size, num_qois_return + 1) [temp, uniq_inds_best] = np.unique(best_sets[:, 0], return_index=True) best_sets = best_sets[uniq_inds_best, :] best_sets = best_sets[best_sets[:, 0].argsort()] best_sets = best_sets[:num_optsets_return, :] # Organize the good sets good_sets_new = np.zeros([1, num_qois_return]) for each in good_sets: good_sets_new = np.append(good_sets_new, each[1:], axis=0) good_sets = good_sets_new print 'Possible sets of QoIs of size %i : '%good_sets.shape[1],\ np.sum(count_qois) print 'Good sets of QoIs of size %i : '%good_sets.shape[1],\ good_sets.shape[0] - 1 comm.Barrier() best_sets = comm.bcast(best_sets, root=0) good_sets = comm.bcast(good_sets, root=0) return (good_sets[1:].astype(int), best_sets, optsingvals_tensor)
def chooseOptQoIs_verbose(input_set, qoiIndices=None, num_qois_return=None, num_optsets_return=None, inner_prod_tol=1.0, measure=False, remove_zeros=True): r""" Given gradient vectors at some points (centers) in the parameter space, a set of QoIs to choose from, and the number of desired QoIs to return, this method returns the ``num_optsets_return`` best sets of QoIs with with repsect to either the average measure of the matrix formed by the gradient vectors of each QoI map, OR the average skewness of the inverse image of this set of QoIs, computed as the product of the singular values of the same matrix. This method is brute force, i.e., if the method is given 10,000 QoIs and told to return the N best sets of 3, it will check all 10,000 choose 3 possible sets. See chooseOptQoIs_large for a less computationally expensive approach. :param input_set: The input sample set. Make sure the attribute _jacobians is not None :type input_set: :class:`~bet.sample.sample_set` :param qoiIndices: Set of QoIs to consider. Default is xrange(0, input_set._jacobians.shape[1]) :type qoiIndices: :class:`np.ndarray` of size (1, num QoIs to consider) :param int num_qois_return: Number of desired QoIs to use in the inverse problem. Default is input_dim :param int num_optsets_return: Number of best sets to return Default is 10 :param boolean measure: If measure is True, use ``calculate_avg_measure`` to determine optimal QoIs, else use ``calculate_avg_skewness`` :param boolean remove_zeros: If True, ``find_unique_vecs`` will remove any QoIs that have a zero gradient :rtype: `np.ndarray` of shape (num_optsets_returned, num_qois_returned + 1) :returns: measure_skewness_indices_mat """ G = input_set._jacobians if G is None: raise ValueError("You must have jacobians to use this method.") input_dim = input_set._dim num_centers = G.shape[0] if qoiIndices is None: qoiIndices = xrange(0, G.shape[1]) if num_qois_return is None: num_qois_return = input_dim if num_optsets_return is None: num_optsets_return = 10 # Remove QoIs that have zero gradients at any of the centers qoiIndices = find_unique_vecs(input_set, inner_prod_tol, qoiIndices, remove_zeros) # Find all posible combinations of QoIs if comm.rank == 0: qoi_combs = np.array( list(combinations(list(qoiIndices), num_qois_return))) logging.info('Possible sets of QoIs : {}'.format(qoi_combs.shape[0])) qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) # For each combination, check the skewness and keep the sets # that have the smallest skewness measure_skewness_indices_mat = np.zeros( [num_optsets_return, num_qois_return + 1]) measure_skewness_indices_mat[:, 0] = np.inf optsingvals_tensor = np.zeros( [num_centers, num_qois_return, num_optsets_return]) for qoi_set in xrange(len(qoi_combs)): if measure == False: (current_measskew, singvals) = calculate_avg_skewness(input_set, qoi_combs[qoi_set]) else: (current_measskew, singvals) = calculate_avg_measure(input_set, qoi_combs[qoi_set]) if current_measskew < measure_skewness_indices_mat[-1, 0]: measure_skewness_indices_mat[-1, :] = np.append(np.array(\ [current_measskew]), qoi_combs[qoi_set]) order = measure_skewness_indices_mat[:, 0].argsort() measure_skewness_indices_mat = measure_skewness_indices_mat[order] optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] # Wait for all processes to get to this point comm.Barrier() # Gather the best sets and skewness values from each processor measure_skewness_indices_mat = np.array(comm.gather(\ measure_skewness_indices_mat, root=0)) optsingvals_tensor = np.array(comm.gather(optsingvals_tensor, root=0)) # Find the num_optsets_return smallest skewness values from all processors if comm.rank == 0: measure_skewness_indices_mat = measure_skewness_indices_mat.reshape(\ num_optsets_return * comm.size, num_qois_return + 1) optsingvals_tensor = optsingvals_tensor.reshape( num_centers, num_qois_return, num_optsets_return * comm.size) order = measure_skewness_indices_mat[:, 0].argsort() measure_skewness_indices_mat = measure_skewness_indices_mat[order] measure_skewness_indices_mat = measure_skewness_indices_mat[\ :num_optsets_return, :] optsingvals_tensor = optsingvals_tensor[:, :, order] optsingvals_tensor = optsingvals_tensor[:, :, :num_optsets_return] measure_skewness_indices_mat = comm.bcast(measure_skewness_indices_mat, root=0) optsingvals_tensor = comm.bcast(optsingvals_tensor, root=0) return (measure_skewness_indices_mat, optsingvals_tensor)
def chooseOptQoIs_verbose(grad_tensor, qoiIndices=None, num_qois_return=None, num_optsets_return=None, inner_prod_tol=1.0, volume=False, remove_zeros=True): r""" Given gradient vectors at some points (centers) in the parameter space, a set of QoIs to choose from, and the number of desired QoIs to return, this method returns the ``num_optsets_return`` best sets of QoIs with with repsect to either the average condition number of the matrix formed by the gradient vectors of each QoI map, or the average volume of the inverse problem us this set of QoIs, computed as the product of the singular values of the same matrix. This method is brute force, i.e., if the method is given 10,000 QoIs and told to return the N best sets of 3, it will check all 10,000 choose 3 possible sets. See chooseOptQoIs_large for a less computationally expensive approach. :param grad_tensor: Gradient vectors at each point of interest in the parameter space :math:`\Lambda` for each QoI map. :type grad_tensor: :class:`np.ndarray` of shape (num_centers, num_qois, Lambda_dim) where num_centers is the number of points in :math:`\Lambda` we have approximated the gradient vectors and num_qois is the total number of possible QoIs to choose from :param qoiIndices: Set of QoIs to consider from grad_tensor. Default is range(0, grad_tensor.shape[1]) :type qoiIndices: :class:`np.ndarray` of size (1, num QoIs to consider) :param int num_qois_return: Number of desired QoIs to use in the inverse problem. Default is Lambda_dim :param int num_optsets_return: Number of best sets to return Default is 10 :param boolean volume: If volume is True, use ``calculate_avg_volume`` to determine optimal QoIs :param boolean remove_zeros: If True, ``find_unique_vecs`` will remove any QoIs that have a zero gradient vector at atleast one point in :math:`\Lambda`. :rtype: tuple :returns: (condnum_indices_mat, optsingvals) where condnum_indices_mat has shape (num_optsets_return, num_qois_return+1) and optsingvals has shape (num_centers, num_qois_return, num_optsets_return) """ num_centers = grad_tensor.shape[0] Lambda_dim = grad_tensor.shape[2] if qoiIndices is None: qoiIndices = range(0, grad_tensor.shape[1]) if num_qois_return is None: num_qois_return = Lambda_dim if num_optsets_return is None: num_optsets_return = 10 qoiIndices = find_unique_vecs(grad_tensor, inner_prod_tol, qoiIndices, remove_zeros) # Find all posible combinations of QoIs if comm.rank == 0: qoi_combs = np.array(list(combinations(list(qoiIndices), num_qois_return))) print 'Possible sets of QoIs : ', qoi_combs.shape[0] qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) # For each combination, check the skewness and keep the sets # that have the best skewness, i.e., smallest condition number condnum_indices_mat = np.zeros([num_optsets_return, num_qois_return + 1]) condnum_indices_mat[:, 0] = 1E99 optsingvals_tensor = np.zeros([num_centers, num_qois_return, num_optsets_return]) for qoi_set in range(len(qoi_combs)): if volume == False: (current_condnum, singvals) = calculate_avg_condnum(grad_tensor, qoi_combs[qoi_set]) else: (current_condnum, singvals) = calculate_avg_volume(grad_tensor, qoi_combs[qoi_set]) if current_condnum < condnum_indices_mat[-1, 0]: condnum_indices_mat[-1, :] = np.append(np.array([current_condnum]), qoi_combs[qoi_set]) order = condnum_indices_mat[:, 0].argsort() condnum_indices_mat = condnum_indices_mat[order] optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] # Wait for all processes to get to this point comm.Barrier() # Gather the best sets and condition numbers from each processor condnum_indices_mat = np.array(comm.gather(condnum_indices_mat, root=0)) optsingvals_tensor = np.array(comm.gather(optsingvals_tensor, root=0)) # Find the num_optsets_return smallest condition numbers from all processors if comm.rank == 0: condnum_indices_mat = condnum_indices_mat.reshape(num_optsets_return * \ comm.size, num_qois_return + 1) optsingvals_tensor = optsingvals_tensor.reshape(num_centers, num_qois_return, num_optsets_return * comm.size) order = condnum_indices_mat[:, 0].argsort() condnum_indices_mat = condnum_indices_mat[order] condnum_indices_mat = condnum_indices_mat[:num_optsets_return, :] optsingvals_tensor = optsingvals_tensor[:, :, order] optsingvals_tensor = optsingvals_tensor[:, :, :num_optsets_return] condnum_indices_mat = comm.bcast(condnum_indices_mat, root=0) optsingvals_tensor = comm.bcast(optsingvals_tensor, root=0) return (condnum_indices_mat, optsingvals_tensor)
def loadmat(save_file, lb_model=None, hot_start=None, num_chains=None): """ Loads data from ``save_file`` into a :class:`~bet.sampling.adaptiveSampling.sampler` object. :param string save_file: file name :param lb_model: runs the model at a given set of parameter samples, (N, ndim), and returns data (N, mdim) :param int hot_start: Flag whether or not hot start the sampling chains from a previous set of chains. Note that ``num_chains`` must be the same, but ``num_chains_pproc`` need not be the same. 0 - cold start, 1 - hot start from uncompleted run, 2 - hot start from finished run :param int num_chains: total number of chains of samples :param callable lb_model: runs the model at a given set of parameter samples, (N, ndim), and returns data (N, mdim) :rtype: tuple of (:class:`bet.sampling.adaptiveSampling.sampler`, :class:`bet.sample.discretization`, :class:`numpy.ndarray`, :class:`numpy.ndarray`) :returns: (``sampler``, ``discretization``, ``all_step_ratios``, ``kern_old``) """ print hot_start if hot_start is None: hot_start = 1 # LOAD FILES if hot_start == 1: # HOT START FROM PARTIAL RUN if comm.rank == 0: logging.info("HOT START from partial run") # Find and open save files save_dir = os.path.dirname(save_file) base_name = os.path.basename(save_file) mdat_files = glob.glob( os.path.join(save_dir, "proc*_{}".format(base_name))) if len(mdat_files) > 0: tmp_mdat = sio.loadmat(mdat_files[0]) else: tmp_mdat = sio.loadmat(save_file) if num_chains is None: num_chains = np.squeeze(tmp_mdat['num_chains']) num_chains_pproc = num_chains / comm.size if len(mdat_files) == 0: logging.info("HOT START using serial file") mdat = sio.loadmat(save_file) if num_chains is None: num_chains = np.squeeze(mdat['num_chains']) num_chains_pproc = num_chains / comm.size disc = sample.load_discretization(save_file) kern_old = np.squeeze(mdat['kern_old']) all_step_ratios = np.squeeze(mdat['step_ratios']) chain_length = disc.check_nums() / num_chains if all_step_ratios.shape == (num_chains, chain_length): msg = "Serial file, from completed" msg += " run updating hot_start" hot_start = 2 # reshape if parallel if comm.size > 1: temp_input = np.reshape(disc._input_sample_set.\ get_values(), (num_chains, chain_length, -1), 'F') temp_output = np.reshape(disc._output_sample_set.\ get_values(), (num_chains, chain_length, -1), 'F') all_step_ratios = np.reshape(all_step_ratios, (num_chains, -1), 'F') elif hot_start == 1 and len(mdat_files) == comm.size: logging.info("HOT START using parallel files (same nproc)") # if the number of processors is the same then set mdat to # be the one with the matching processor number (doesn't # really matter) disc = sample.load_discretization(mdat_files[comm.rank]) kern_old = np.squeeze(tmp_mdat['kern_old']) all_step_ratios = np.squeeze(tmp_mdat['step_ratios']) elif hot_start == 1 and len(mdat_files) != comm.size: logging.info("HOT START using parallel files (diff nproc)") # Determine how many processors the previous data used # otherwise gather the data from mdat and then scatter # among the processors and update mdat mdat_files_local = comm.scatter(mdat_files) mdat_local = [sio.loadmat(m) for m in mdat_files_local] disc_local = [sample.load_discretization(m) for m in\ mdat_files_local] mdat_list = comm.allgather(mdat_local) disc_list = comm.allgather(disc_local) mdat_global = [] disc_global = [] # instead of a list of lists, create a list of mdat for mlist, dlist in zip(mdat_list, disc_list): mdat_global.extend(mlist) disc_global.extend(dlist) # get num_proc and num_chains_pproc for previous run old_num_proc = max((len(mdat_list), 1)) old_num_chains_pproc = num_chains / old_num_proc # get batch size and/or number of dimensions chain_length = disc_global[0].check_nums()/\ old_num_chains_pproc disc = disc_global[0].copy() # create lists of local data temp_input = [] temp_output = [] all_step_ratios = [] kern_old = [] # RESHAPE old_num_chains_pproc, chain_length(or batch), dim for mdat, disc_local in zip(mdat_global, disc_local): temp_input.append(np.reshape(disc_local.\ _input_sample_set.get_values_local(), (old_num_chains_pproc, chain_length, -1), 'F')) temp_output.append(np.reshape(disc_local.\ _output_sample_set.get_values_local(), (old_num_chains_pproc, chain_length, -1), 'F')) all_step_ratios.append( np.reshape(mdat['step_ratios'], (old_num_chains_pproc, chain_length, -1), 'F')) kern_old.append( np.reshape(mdat['kern_old'], (old_num_chains_pproc, ), 'F')) # turn into arrays temp_input = np.concatenate(temp_input) temp_output = np.concatenate(temp_output) all_step_ratios = np.concatenate(all_step_ratios) kern_old = np.concatenate(kern_old) if hot_start == 2: # HOT START FROM COMPLETED RUN: if comm.rank == 0: logging.info("HOT START from completed run") mdat = sio.loadmat(save_file) if num_chains is None: num_chains = np.squeeze(mdat['num_chains']) num_chains_pproc = num_chains / comm.size disc = sample.load_discretization(save_file) kern_old = np.squeeze(mdat['kern_old']) all_step_ratios = np.squeeze(mdat['step_ratios']) chain_length = disc.check_nums() / num_chains # reshape if parallel if comm.size > 1: temp_input = np.reshape(disc._input_sample_set.\ get_values(), (num_chains, chain_length, -1), 'F') temp_output = np.reshape(disc._output_sample_set.\ get_values(), (num_chains, chain_length, -1), 'F') all_step_ratios = np.reshape(all_step_ratios, (num_chains, chain_length), 'F') # SPLIT DATA IF NECESSARY if comm.size > 1 and (hot_start == 2 or (hot_start == 1 and \ len(mdat_files) != comm.size)): # Use split to split along num_chains and set *._values_local disc._input_sample_set.set_values_local(np.reshape(np.split(\ temp_input, comm.size, 0)[comm.rank], (num_chains_pproc*chain_length, -1), 'F')) disc._output_sample_set.set_values_local(np.reshape(np.split(\ temp_output, comm.size, 0)[comm.rank], (num_chains_pproc*chain_length, -1), 'F')) all_step_ratios = np.reshape( np.split(all_step_ratios, comm.size, 0)[comm.rank], (num_chains_pproc * chain_length, ), 'F') kern_old = np.reshape( np.split(kern_old, comm.size, 0)[comm.rank], (num_chains_pproc, ), 'F') else: all_step_ratios = np.reshape(all_step_ratios, (-1, ), 'F') print chain_length * num_chains, chain_length, lb_model new_sampler = sampler(chain_length * num_chains, chain_length, lb_model) return (new_sampler, disc, all_step_ratios, kern_old)
def generalized_chains(self, param_min, param_max, t_set, kern, savefile, initial_sample_type="random", criterion='center', hot_start=0): """ Basic adaptive sampling algorithm using generalized chains. :param string initial_sample_type: type of initial sample random (or r), latin hypercube(lhs), or space-filling curve(TBD) :param param_min: minimum value for each parameter dimension :type param_min: :class:`numpy.ndarray` (ndim,) :param param_max: maximum value for each parameter dimension :type param_max: :class:`numpy.ndarray` (ndim,) :param t_set: method for creating new parameter steps using given a step size based on the paramter domain size :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set` :param kern: functional that acts on the data used to determine the proposed change to the ``step_size`` :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object. :param string savefile: filename to save samples and data :param int hot_start: Flag whether or not hot start the sampling chains from a previous set of chains. Note that ``num_chains`` must be the same, but ``num_chains_pproc`` need not be the same. 0 - cold start, 1 - hot start from uncompleted run, 2 - hot start from finished run :param string criterion: latin hypercube criterion see `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_ :rtype: tuple :returns: (``parameter_samples``, ``data_samples``, ``all_step_ratios``) where ``parameter_samples`` is np.ndarray of shape (num_samples, ndim), ``data_samples`` is np.ndarray of shape (num_samples, mdim), and ``all_step_ratios`` is np.ndarray of shape (num_chains, chain_length) """ if comm.size > 1: psavefile = os.path.join(os.path.dirname(savefile), "proc{}_{}".format(comm.rank, os.path.basename(savefile))) # Initialize Nx1 vector Step_size = something reasonable (based on size # of domain and transition set type) # Calculate domain size param_left = np.repeat([param_min], self.num_chains_pproc, 0) param_right = np.repeat([param_max], self.num_chains_pproc, 0) param_width = param_right - param_left # Calculate step_size max_ratio = t_set.max_ratio min_ratio = t_set.min_ratio if not hot_start: step_ratio = t_set.init_ratio*np.ones(self.num_chains_pproc) # Initiative first batch of N samples (maybe taken from latin # hypercube/space-filling curve to fully explore parameter space - # not necessarily random). Call these Samples_old. (samples_old, data_old) = super(sampler, self).random_samples( initial_sample_type, param_min, param_max, savefile, self.num_chains, criterion) self.num_samples = self.chain_length * self.num_chains comm.Barrier() # now split it all up if comm.size > 1: MYsamples_old = np.empty((np.shape(samples_old)[0]/comm.size, np.shape(samples_old)[1])) comm.Scatter([samples_old, MPI.DOUBLE], [MYsamples_old, MPI.DOUBLE]) MYdata_old = np.empty((np.shape(data_old)[0]/comm.size, np.shape(data_old)[1])) comm.Scatter([data_old, MPI.DOUBLE], [MYdata_old, MPI.DOUBLE]) else: MYsamples_old = np.copy(samples_old) MYdata_old = np.copy(data_old) samples = MYsamples_old data = MYdata_old all_step_ratios = step_ratio (kern_old, proposal) = kern.delta_step(MYdata_old, None) start_ind = 1 if hot_start: # LOAD FILES if hot_start == 1: # HOT START FROM PARTIAL RUN if comm.rank == 0: print "HOT START from partial run" # Find and open save files save_dir = os.path.dirname(savefile) base_name = os.path.dirname(savefile) mdat_files = glob.glob(os.path.join(save_dir, "proc*_{}".format(base_name))) if len(mdat_files) == 0: print "HOT START using serial file" mdat = sio.loadmat(savefile) samples = mdat['samples'] data = mdat['data'] kern_old = np.squeeze(mdat['kern_old']) all_step_ratios = np.squeeze(mdat['step_ratios']) chain_length = samples.shape[0]/self.num_chains if all_step_ratios.shape == (self.num_chains, chain_length): print "Serial file, from completed run updating hot_start" hot_start = 2 # reshape if parallel if comm.size > 1: samples = np.reshape(samples, (self.num_chains, chain_length, -1), 'F') data = np.reshape(data, (self.num_chains, chain_length, -1), 'F') all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, -1), 'F') elif hot_start == 1 and len(mdat_files) == comm.size: print "HOT START using parallel files (same nproc)" # if the number of processors is the same then set mdat to # be the one with the matching processor number (doesn't # really matter) mdat = sio.loadmat(mdat_files[comm.rank]) samples = mdat['samples'] data = mdat['data'] kern_old = np.squeeze(mdat['kern_old']) all_step_ratios = np.squeeze(mdat['step_ratios']) elif hot_start == 1 and len(mdat_files) != comm.size: print "HOT START using parallel files (diff nproc)" # Determine how many processors the previous data used # otherwise gather the data from mdat and then scatter # among the processors and update mdat mdat_files_local = comm.scatter(mdat_files) mdat_local = [sio.loadmat(m) for m in mdat_files_local] mdat_list = comm.allgather(mdat_local) mdat_global = [] # instead of a list of lists, create a list of mdat for mlist in mdat_list: mdat_global.extend(mlist) # get num_proc and num_chains_pproc for previous run old_num_proc = max((len(mdat_list), 1)) old_num_chains_pproc = self.num_chains/old_num_proc # get batch size and/or number of dimensions chain_length = mdat_global[0]['samples'].shape[0]/\ old_num_chains_pproc # create lists of local data samples = [] data = [] all_step_ratios = [] kern_old = [] # RESHAPE old_num_chains_pproc, chain_length(or batch), dim for mdat in mdat_global: samples.append(np.reshape(mdat['samples'], (old_num_chains_pproc, chain_length, -1), 'F')) data.append(np.reshape(mdat['data'], (old_num_chains_pproc, chain_length, -1), 'F')) all_step_ratios.append(np.reshape(mdat['step_ratios'], (old_num_chains_pproc, chain_length, -1), 'F')) kern_old.append(np.reshape(mdat['kern_old'], (old_num_chains_pproc,), 'F')) # turn into arrays samples = np.concatenate(samples) data = np.concatenate(data) all_step_ratios = np.concatenate(all_step_ratios) kern_old = np.concatenate(kern_old) if hot_start == 2: # HOT START FROM COMPLETED RUN: if comm.rank == 0: print "HOT START from completed run" mdat = sio.loadmat(savefile) samples = mdat['samples'] data = mdat['data'] kern_old = np.squeeze(mdat['kern_old']) all_step_ratios = np.squeeze(mdat['step_ratios']) chain_length = samples.shape[0]/self.num_chains mdat_files = [] # reshape if parallel if comm.size > 1: samples = np.reshape(samples, (self.num_chains, chain_length, -1), 'F') data = np.reshape(data, (self.num_chains, chain_length, -1), 'F') all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, chain_length), 'F') # SPLIT DATA IF NECESSARY if comm.size > 1 and (hot_start == 2 or (hot_start == 1 and \ len(mdat_files) != comm.size)): # Use split to split along num_chains samples = np.reshape(np.split(samples, comm.size, 0)[comm.rank], (self.num_chains_pproc*chain_length, -1), 'F') data = np.reshape(np.split(data, comm.size, 0)[comm.rank], (self.num_chains_pproc*chain_length, -1), 'F') all_step_ratios = np.reshape(np.split(all_step_ratios, comm.size, 0)[comm.rank], (self.num_chains_pproc*chain_length,), 'F') kern_old = np.reshape(np.split(kern_old, comm.size, 0)[comm.rank], (self.num_chains_pproc,), 'F') else: all_step_ratios = np.reshape(all_step_ratios, (-1,), 'F') # Set samples, data, all_step_ratios, mdat, step_ratio, # MYsamples_old, and kern_old accordingly step_ratio = all_step_ratios[-self.num_chains_pproc:] MYsamples_old = samples[-self.num_chains_pproc:, :] # Determine how many batches have been run start_ind = samples.shape[0]/self.num_chains_pproc mdat = dict() self.update_mdict(mdat) for batch in xrange(start_ind, self.chain_length): # For each of N samples_old, create N new parameter samples using # transition set and step_ratio. Call these samples samples_new. samples_new = t_set.step(step_ratio, param_width, param_left, param_right, MYsamples_old) # Solve the model for the samples_new. data_new = self.lb_model(samples_new) # Make some decision about changing step_size(k). There are # multiple ways to do this. # Determine step size (kern_old, proposal) = kern.delta_step(data_new, kern_old) step_ratio = proposal*step_ratio # Is the ratio greater than max? step_ratio[step_ratio > max_ratio] = max_ratio # Is the ratio less than min? step_ratio[step_ratio < min_ratio] = min_ratio # Save and export concatentated arrays if self.chain_length < 4: pass elif comm.rank == 0 and (batch+1)%(self.chain_length/4) == 0: print "Current chain length: "+\ str(batch+1)+"/"+str(self.chain_length) samples = np.concatenate((samples, samples_new)) data = np.concatenate((data, data_new)) all_step_ratios = np.concatenate((all_step_ratios, step_ratio)) mdat['step_ratios'] = all_step_ratios mdat['samples'] = samples mdat['data'] = data mdat['kern_old'] = kern_old if comm.size > 1: super(sampler, self).save(mdat, psavefile) else: super(sampler, self).save(mdat, savefile) MYsamples_old = samples_new # collect everything MYsamples = np.copy(samples) MYdata = np.copy(data) MYall_step_ratios = np.copy(all_step_ratios) # ``parameter_samples`` is np.ndarray of shape (num_samples, ndim) samples = util.get_global_values(MYsamples, shape=(self.num_samples, np.shape(MYsamples)[1])) # and ``data_samples`` is np.ndarray of shape (num_samples, mdim) data = util.get_global_values(MYdata, shape=(self.num_samples, np.shape(MYdata)[1])) # ``all_step_ratios`` is np.ndarray of shape (num_chains, # chain_length) all_step_ratios = util.get_global_values(MYall_step_ratios, shape=(self.num_samples,)) all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, self.chain_length), 'F') # save everything mdat['step_ratios'] = all_step_ratios mdat['samples'] = samples mdat['data'] = data mdat['kern_old'] = util.get_global_values(kern_old, shape=(self.num_chains,)) super(sampler, self).save(mdat, savefile) return (samples, data, all_step_ratios)
def find_good_sets(input_set, good_sets_prev, unique_indices, num_optsets_return, measskew_tol, measure): r""" .. todo:: Use the idea we only know vectors are with 10% accuracy to guide inner_prod tol and skewness_tol. Given gradient vectors at each center in the parameter space and given good sets of size (n - 1), return good sets of size n. That is, return sets of size n that have average measure(skewness) less than some tolerance. :param input_set: The input sample set. Make sure the attribute _jacobians is not None. :type input_set: :class:`~bet.sample.sample_set` :param good_sets_prev: Good sets of QoIs of size n - 1. :type good_sets_prev: :class:`np.ndarray` of size (num_good_sets_prev, n - 1) :param unique_indices: Unique QoIs to consider. :type unique_indices: :class:`np.ndarray` of size (num_unique_qois, 1) :param int num_optsets_return: Number of best sets to return :param float measskew_tol: Throw out all sets of QoIs with average measure(skewness) number greater than this. :param boolean measure: If measure is True, use ``calculate_avg_measure`` to determine optimal QoIs, else use ``calculate_avg_skewness`` :rtype: tuple :returns: (good_sets, best_sets, optsingvals_tensor) where good sets has size (num_good_sets, n), best sets has size (num_optsets_return, n + 1) and optsingvals_tensor has size (num_centers, n, input_dim) """ if input_set._jacobians is None: raise ValueError("You must have jacobians to use this method.") num_centers = input_set._jacobians.shape[0] num_qois_return = good_sets_prev.shape[1] + 1 comm.Barrier() # Initialize best sets and set all skewness values large best_sets = np.zeros([num_optsets_return, num_qois_return + 1]) best_sets[:, 0] = np.inf good_sets = np.zeros([1, num_qois_return]) count_qois = 0 optsingvals_tensor = np.zeros([num_centers, num_qois_return, num_optsets_return]) # For each good set of size (n - 1), find the possible sets of size n and # compute the average skewness of each count_qois = 0 for i in range(good_sets_prev.shape[0]): min_ind = np.max(good_sets_prev[i, :]) # Find all possible combinations of QoIs that include this set of # (n - 1) if comm.rank == 0: inds_notin_set = util.fix_dimensions_vector_2darray(list(set( unique_indices) - set(good_sets_prev[i, :]))) # Choose only the QoI indices > min_ind so we do not repeat sets inds_notin_set = util.fix_dimensions_vector_2darray(inds_notin_set[ inds_notin_set > min_ind]) qoi_combs = util.fix_dimensions_vector_2darray(np.append(np.tile( good_sets_prev[i, :], [inds_notin_set.shape[0], 1]), inds_notin_set, axis=1)) qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) # For each combination, compute the average measure(skewness) and add # the set to good_sets if it is less than measskew_tol for qoi_set in range(len(qoi_combs)): count_qois += 1 curr_set = util.fix_dimensions_vector_2darray(qoi_combs[qoi_set])\ .transpose() if measure is False: (current_measskew, singvals) = calculate_avg_skewness(input_set, qoi_combs[qoi_set]) else: (current_measskew, singvals) = calculate_avg_measure(input_set, qoi_combs[qoi_set]) # If its a good set, add it to good_sets if current_measskew < measskew_tol: good_sets = np.append(good_sets, curr_set, axis=0) # If the average skewness is less than the maxskewness # in our best_sets, add it to best_sets if current_measskew < best_sets[-1, 0]: best_sets[-1, :] = np.append(np.array([current_measskew]), qoi_combs[qoi_set]) order = best_sets[:, 0].argsort() best_sets = best_sets[order] # Store the corresponding singular values optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] # Wait for all processes to get to this point comm.Barrier() # Gather the best sets and skewness values from each processor good_sets = comm.gather(good_sets, root=0) best_sets = np.array(comm.gather(best_sets, root=0)) count_qois = np.array(comm.gather(count_qois, root=0)) # Find the num_optsets_return smallest skewness from all processors if comm.rank == 0: # Organize the best sets best_sets = best_sets.reshape(num_optsets_return * comm.size, num_qois_return + 1) [_, uniq_inds_best] = np.unique(best_sets[:, 0], return_index=True) best_sets = best_sets[uniq_inds_best, :] best_sets = best_sets[best_sets[:, 0].argsort()] best_sets = best_sets[:num_optsets_return, :] # Organize the good sets good_sets_new = np.zeros([1, num_qois_return]) for each in good_sets: good_sets_new = np.append(good_sets_new, each[1:], axis=0) good_sets = good_sets_new logging.info('Possible sets of QoIs of size {} : {}'.format( good_sets.shape[1], np.sum(count_qois))) logging.info('Good sets of QoIs of size {} : {}'.format( good_sets.shape[1], good_sets.shape[0] - 1)) comm.Barrier() best_sets = comm.bcast(best_sets, root=0) good_sets = comm.bcast(good_sets, root=0) return (good_sets[1:].astype(int), best_sets, optsingvals_tensor)
def chooseOptQoIs_verbose(input_set, qoiIndices=None, num_qois_return=None, num_optsets_return=None, inner_prod_tol=1.0, measure=False, remove_zeros=True): r""" Given gradient vectors at some points (centers) in the parameter space, a set of QoIs to choose from, and the number of desired QoIs to return, this method returns the ``num_optsets_return`` best sets of QoIs with with repsect to either the average measure of the matrix formed by the gradient vectors of each QoI map, OR the average skewness of the inverse image of this set of QoIs, computed as the product of the singular values of the same matrix. This method is brute force, i.e., if the method is given 10,000 QoIs and told to return the N best sets of 3, it will check all 10,000 choose 3 possible sets. See chooseOptQoIs_large for a less computationally expensive approach. :param input_set: The input sample set. Make sure the attribute _jacobians is not None :type input_set: :class:`~bet.sample.sample_set` :param qoiIndices: Set of QoIs to consider. Default is xrange(0, input_set._jacobians.shape[1]) :type qoiIndices: :class:`np.ndarray` of size (1, num QoIs to consider) :param int num_qois_return: Number of desired QoIs to use in the inverse problem. Default is input_dim :param int num_optsets_return: Number of best sets to return Default is 10 :param boolean measure: If measure is True, use ``calculate_avg_measure`` to determine optimal QoIs, else use ``calculate_avg_skewness`` :param boolean remove_zeros: If True, ``find_unique_vecs`` will remove any QoIs that have a zero gradient :rtype: `np.ndarray` of shape (num_optsets_returned, num_qois_returned + 1) :returns: measure_skewness_indices_mat """ G = input_set._jacobians if G is None: raise ValueError("You must have jacobians to use this method.") input_dim = input_set._dim num_centers = G.shape[0] if qoiIndices is None: qoiIndices = range(0, G.shape[1]) if num_qois_return is None: num_qois_return = input_dim if num_optsets_return is None: num_optsets_return = 10 # Remove QoIs that have zero gradients at any of the centers qoiIndices = find_unique_vecs(input_set, inner_prod_tol, qoiIndices, remove_zeros) # Find all posible combinations of QoIs if comm.rank == 0: qoi_combs = np.array(list(combinations(list(qoiIndices), num_qois_return))) logging.info('Possible sets of QoIs : {}'.format(qoi_combs.shape[0])) qoi_combs = np.array_split(qoi_combs, comm.size) else: qoi_combs = None # Scatter them throughout the processors qoi_combs = comm.scatter(qoi_combs, root=0) # For each combination, check the skewness and keep the sets # that have the smallest skewness measure_skewness_indices_mat = np.zeros([num_optsets_return, num_qois_return + 1]) measure_skewness_indices_mat[:, 0] = np.inf optsingvals_tensor = np.zeros([num_centers, num_qois_return, num_optsets_return]) for qoi_set in range(len(qoi_combs)): if measure == False: (current_measskew, singvals) = calculate_avg_skewness(input_set, qoi_combs[qoi_set]) else: (current_measskew, singvals) = calculate_avg_measure(input_set, qoi_combs[qoi_set]) if current_measskew < measure_skewness_indices_mat[-1, 0]: measure_skewness_indices_mat[-1, :] = np.append(np.array( [current_measskew]), qoi_combs[qoi_set]) order = measure_skewness_indices_mat[:, 0].argsort() measure_skewness_indices_mat = measure_skewness_indices_mat[order] optsingvals_tensor[:, :, -1] = singvals optsingvals_tensor = optsingvals_tensor[:, :, order] # Wait for all processes to get to this point comm.Barrier() # Gather the best sets and skewness values from each processor measure_skewness_indices_mat = np.array(comm.gather( measure_skewness_indices_mat, root=0)) optsingvals_tensor = np.array(comm.gather(optsingvals_tensor, root=0)) # Find the num_optsets_return smallest skewness values from all processors if comm.rank == 0: measure_skewness_indices_mat = measure_skewness_indices_mat.reshape( num_optsets_return * comm.size, num_qois_return + 1) optsingvals_tensor = optsingvals_tensor.reshape(num_centers, num_qois_return, num_optsets_return * comm.size) order = measure_skewness_indices_mat[:, 0].argsort() measure_skewness_indices_mat = measure_skewness_indices_mat[order] measure_skewness_indices_mat = measure_skewness_indices_mat[ :num_optsets_return, :] optsingvals_tensor = optsingvals_tensor[:, :, order] optsingvals_tensor = optsingvals_tensor[:, :, :num_optsets_return] measure_skewness_indices_mat = comm.bcast(measure_skewness_indices_mat, root=0) optsingvals_tensor = comm.bcast(optsingvals_tensor, root=0) return (measure_skewness_indices_mat, optsingvals_tensor)