Ejemplo n.º 1
0
def verify_user_samples(model, sampler, samples, savefile, parallel):
    # evalulate the model at the samples directly
    data = model(samples)

    # evaluate the model at the samples
    (my_samples, my_data) = sampler.user_samples(samples, savefile, parallel)

    if len(data.shape) == 1:
        data = np.expand_dims(data, axis=1)
    if len(samples.shape) == 1:
        samples = np.expand_dims(samples, axis=1)

    # compare the samples
    nptest.assert_array_equal(samples, my_samples)
    # compare the data
    nptest.assert_array_equal(data, my_data)
    # did num_samples get updated?
    assert samples.shape[0] == sampler.num_samples
    # did the file get correctly saved?

    if comm.rank == 0:
        mdat = sio.loadmat(savefile)
        nptest.assert_array_equal(samples, mdat['samples'])
        nptest.assert_array_equal(data, mdat['data'])
    comm.Barrier()
Ejemplo n.º 2
0
def verify_random_samples(model, sampler, sample_type, param_min, param_max,
                          num_samples, savefile, parallel):
    # recreate the samples
    if num_samples == None:
        num_samples = sampler.num_samples
    param_left = np.repeat([param_min], num_samples, 0)
    param_right = np.repeat([param_max], num_samples, 0)
    samples = (param_right - param_left)
    if sample_type == "lhs":
        samples = samples * pyDOE.lhs(param_min.shape[-1], num_samples)
    elif sample_type == "random" or "r":
        np.random.seed(1)
        samples = samples * np.random.random(param_left.shape)
    samples = samples + param_left
    # evalulate the model at the samples directly
    data = model(samples)

    # evaluate the model at the samples
    # reset the random seed
    if sample_type == "random" or "r":
        np.random.seed(1)
    (my_samples, my_data) = sampler.user_samples(samples, savefile, parallel)

    # make sure that the samples are within the boundaries
    assert np.all(my_samples <= param_right)
    assert np.all(my_samples >= param_left)

    if len(data.shape) == 1:
        data = np.expand_dims(data, axis=1)
    if len(samples.shape) == 1:
        samples = np.expan_dims(samples, axis=1)

    # compare the samples
    nptest.assert_array_equal(samples, my_samples)
    # compare the data
    nptest.assert_array_equal(data, my_data)
    # did num_samples get updated?
    assert samples.shape[0] == sampler.num_samples
    assert num_samples == sampler.num_samples
    # did the file get correctly saved?

    if comm.rank == 0:
        mdat = sio.loadmat(savefile)
        nptest.assert_array_equal(samples, mdat['samples'])
        nptest.assert_array_equal(data, mdat['data'])
    comm.Barrier()
Ejemplo n.º 3
0
def find_good_sets(grad_tensor, good_sets_prev, unique_indices,
                   num_optsets_return, cond_tol, volume):
    r"""
    #TODO:  Use the idea we only know vectors are with 10% accuracy to guide
        inner_prod tol and condnum_tol.
    Given gradient vectors at each center in the parameter space and given
    good sets of size n - 1, return good sets of size n.  That is, return
    sets of size n that have average condition number less than some tolerance.
    :param grad_tensor: Gradient vectors at each centers in the parameter
        space :math:'\Lambda' for each QoI map.
    :type grad_tensor: :class:`np.ndarray` of shape (num_centers,num_qois,Ldim)
        where num_centers is the number of points in :math:'\Lambda' we have
        approximated the gradient vectors, num_qois is the total number of
        possible QoIs to choose from, Ldim is the dimension of :math:`\Lambda`.
    :param good_sets_prev: Good sets of QoIs of size n - 1.
    :type good_sets_prev: :class:`np.ndarray` of size (num_good_sets_prev, n - 1)
    :param unique_indices: Unique QoIs to consider.
    :type unique_indices: :class:'np.ndarray' of size (num_unique_qois, 1)
    :param int num_optsets_return: Number of best sets to return
    :param float cond_tol: Throw out all sets of QoIs with average condition
        number greater than this.
    :param boolean volume: If volume is True, use ``calculate_avg_volume``
        to determine optimal QoIs
    :rtype: tuple
    :returns: (good_sets, best_sets, optsingvals_tensor) where good sets has
        size (num_good_sets, n), best sets has size (num_optsets_return,
        n + 1) and optsingvals_tensor has size (num_centers, n, Lambda_dim)
    """
    num_centers = grad_tensor.shape[0]
    Lambda_dim = grad_tensor.shape[2]
    num_qois_return = good_sets_prev.shape[1] + 1
    comm.Barrier()

    # Initialize best sets and set all condition numbers large
    best_sets = np.zeros([num_optsets_return, num_qois_return + 1])
    best_sets[:, 0] = np.inf
    good_sets = np.zeros([1, num_qois_return])
    count_qois = 0
    optsingvals_tensor = np.zeros(
        [num_centers, num_qois_return, num_optsets_return])

    # For each good set of size n - 1, find the possible sets of size n and
    # compute the average condition number of each
    count_qois = 0
    for i in range(good_sets_prev.shape[0]):
        min_ind = np.max(good_sets_prev[i, :])
        # Find all possible combinations of QoIs that include this set of n - 1
        if comm.rank == 0:
            inds_notin_set = util.fix_dimensions_vector_2darray(list(set(\
                unique_indices) - set(good_sets_prev[i, :])))

            # Choose only the QoI indices > min_ind so we do not repeat sets
            inds_notin_set = util.fix_dimensions_vector_2darray(inds_notin_set[\
                inds_notin_set > min_ind])
            qoi_combs = util.fix_dimensions_vector_2darray(np.append(np.tile(\
                good_sets_prev[i, :], [inds_notin_set.shape[0], 1]),
                inds_notin_set, axis=1))
            qoi_combs = np.array_split(qoi_combs, comm.size)
        else:
            qoi_combs = None

        # Scatter them throughout the processors
        qoi_combs = comm.scatter(qoi_combs, root=0)

        # For each combination, compute the average condition number and add the
        # set to good_sets if it is less than cond_tol
        for qoi_set in range(len(qoi_combs)):
            count_qois += 1
            curr_set = util.fix_dimensions_vector_2darray(qoi_combs[qoi_set])\
                .transpose()
            if volume == False:
                (current_condnum,
                 singvals) = calculate_avg_condnum(grad_tensor,
                                                   qoi_combs[qoi_set])
            else:
                (current_condnum,
                 singvals) = calculate_avg_volume(grad_tensor,
                                                  qoi_combs[qoi_set])

            # If its a good set, add it to good_sets
            if current_condnum < cond_tol:
                good_sets = np.append(good_sets, curr_set, axis=0)

                # If the average condition number is less than the max condition
                # number in our best_sets, add it to best_sets
                if current_condnum < best_sets[-1, 0]:
                    best_sets[-1, :] = np.append(np.array([current_condnum]),
                                                 qoi_combs[qoi_set])
                    order = best_sets[:, 0].argsort()
                    best_sets = best_sets[order]

                    # Store the corresponding singular values
                    optsingvals_tensor[:, :, -1] = singvals
                    optsingvals_tensor = optsingvals_tensor[:, :, order]

    # Wait for all processes to get to this point
    comm.Barrier()

    # Gather the best sets and condition numbers from each processor
    good_sets = comm.gather(good_sets, root=0)
    best_sets = np.array(comm.gather(best_sets, root=0))
    count_qois = np.array(comm.gather(count_qois, root=0))

    # Find the num_optsets_return smallest condition numbers from all processors
    if comm.rank == 0:

        # Organize the best sets
        best_sets = best_sets.reshape(num_optsets_return * \
            comm.size, num_qois_return + 1)
        [temp, uniq_inds_best] = np.unique(best_sets[:, 0], return_index=True)
        best_sets = best_sets[uniq_inds_best, :]
        best_sets = best_sets[best_sets[:, 0].argsort()]
        best_sets = best_sets[:num_optsets_return, :]

        # Organize the good sets
        good_sets_new = np.zeros([1, num_qois_return])
        for each in good_sets:
            good_sets_new = np.append(good_sets_new, each[1:], axis=0)
        good_sets = good_sets_new

        print 'Possible sets of QoIs of size %i : '%good_sets.shape[1],\
            np.sum(count_qois)
        print 'Good sets of QoIs of size %i : '%good_sets.shape[1],\
            good_sets.shape[0] - 1

    comm.Barrier()
    best_sets = comm.bcast(best_sets, root=0)
    good_sets = comm.bcast(good_sets, root=0)

    return (good_sets[1:].astype(int), best_sets, optsingvals_tensor)
Ejemplo n.º 4
0
def chooseOptQoIs_verbose(grad_tensor,
                          qoiIndices=None,
                          num_qois_return=None,
                          num_optsets_return=None,
                          inner_prod_tol=1.0,
                          volume=False,
                          remove_zeros=True):
    r"""
    Given gradient vectors at some points (centers) in the parameter space, a
    set of QoIs to choose from, and the number of desired QoIs to return, this
    method returns the ``num_optsets_return`` best sets of QoIs with with
    repsect to either the average condition number of the matrix formed by the
    gradient vectors of each QoI map, or the average volume of the inverse
    problem us this set of QoIs, computed as the product of the singular values
    of the same matrix.  This method is brute force, i.e., if the method is
    given 10,000 QoIs and told to return the N best sets of 3, it will check all
    10,000 choose 3 possible sets.  See chooseOptQoIs_large for a less
    computationally expensive approach.
    :param grad_tensor: Gradient vectors at each point of interest in the
        parameter space :math:`\Lambda` for each QoI map.
    :type grad_tensor: :class:`np.ndarray` of shape (num_centers, num_qois,
        Lambda_dim) where num_centers is the number of points in :math:`\Lambda`
        we have approximated the gradient vectors and num_qois is the total
        number of possible QoIs to choose from
    :param qoiIndices: Set of QoIs to consider from grad_tensor.  Default is
        range(0, grad_tensor.shape[1])
    :type qoiIndices: :class:`np.ndarray` of size (1, num QoIs to consider)
    :param int num_qois_return: Number of desired QoIs to use in the
        inverse problem.  Default is Lambda_dim
    :param int num_optsets_return: Number of best sets to return
        Default is 10
    :param boolean volume: If volume is True, use ``calculate_avg_volume``
        to determine optimal QoIs
    :param boolean remove_zeros: If True, ``find_unique_vecs`` will remove any
        QoIs that have a zero gradient vector at atleast one point in
        :math:`\Lambda`.
    :rtype: tuple
    :returns: (condnum_indices_mat, optsingvals) where condnum_indices_mat has
        shape (num_optsets_return, num_qois_return+1) and optsingvals
        has shape (num_centers, num_qois_return, num_optsets_return)
    """
    num_centers = grad_tensor.shape[0]
    Lambda_dim = grad_tensor.shape[2]
    if qoiIndices is None:
        qoiIndices = range(0, grad_tensor.shape[1])
    if num_qois_return is None:
        num_qois_return = Lambda_dim
    if num_optsets_return is None:
        num_optsets_return = 10

    qoiIndices = find_unique_vecs(grad_tensor, inner_prod_tol, qoiIndices,
                                  remove_zeros)

    # Find all posible combinations of QoIs
    if comm.rank == 0:
        qoi_combs = np.array(
            list(combinations(list(qoiIndices), num_qois_return)))
        print 'Possible sets of QoIs : ', qoi_combs.shape[0]
        qoi_combs = np.array_split(qoi_combs, comm.size)
    else:
        qoi_combs = None

    # Scatter them throughout the processors
    qoi_combs = comm.scatter(qoi_combs, root=0)

    # For each combination, check the skewness and keep the sets
    # that have the best skewness, i.e., smallest condition number
    condnum_indices_mat = np.zeros([num_optsets_return, num_qois_return + 1])
    condnum_indices_mat[:, 0] = np.inf
    optsingvals_tensor = np.zeros(
        [num_centers, num_qois_return, num_optsets_return])
    for qoi_set in range(len(qoi_combs)):
        if volume == False:
            (current_condnum,
             singvals) = calculate_avg_condnum(grad_tensor, qoi_combs[qoi_set])
        else:
            (current_condnum,
             singvals) = calculate_avg_volume(grad_tensor, qoi_combs[qoi_set])

        if current_condnum < condnum_indices_mat[-1, 0]:
            condnum_indices_mat[-1, :] = np.append(np.array([current_condnum]),
                                                   qoi_combs[qoi_set])
            order = condnum_indices_mat[:, 0].argsort()
            condnum_indices_mat = condnum_indices_mat[order]

            optsingvals_tensor[:, :, -1] = singvals
            optsingvals_tensor = optsingvals_tensor[:, :, order]

    # Wait for all processes to get to this point
    comm.Barrier()

    # Gather the best sets and condition numbers from each processor
    condnum_indices_mat = np.array(comm.gather(condnum_indices_mat, root=0))
    optsingvals_tensor = np.array(comm.gather(optsingvals_tensor, root=0))

    # Find the num_optsets_return smallest condition numbers from all processors
    if comm.rank == 0:
        condnum_indices_mat = condnum_indices_mat.reshape(num_optsets_return * \
            comm.size, num_qois_return + 1)
        optsingvals_tensor = optsingvals_tensor.reshape(
            num_centers, num_qois_return, num_optsets_return * comm.size)
        order = condnum_indices_mat[:, 0].argsort()

        condnum_indices_mat = condnum_indices_mat[order]
        condnum_indices_mat = condnum_indices_mat[:num_optsets_return, :]

        optsingvals_tensor = optsingvals_tensor[:, :, order]
        optsingvals_tensor = optsingvals_tensor[:, :, :num_optsets_return]

    condnum_indices_mat = comm.bcast(condnum_indices_mat, root=0)
    optsingvals_tensor = comm.bcast(optsingvals_tensor, root=0)

    return (condnum_indices_mat, optsingvals_tensor)
Ejemplo n.º 5
0
def find_good_sets(input_set, good_sets_prev, unique_indices,
                   num_optsets_return, measskew_tol, measure):
    r"""

    .. todo::  Use the idea we only know vectors are with 10% accuracy to guide
        inner_prod tol and skewness_tol.
    
    Given gradient vectors at each center in the parameter space and given
    good sets of size (n - 1), return good sets of size n.  That is, return
    sets of size n that have average measure(skewness) less than some tolerance.
    
    :param input_set: The input sample set.  Make sure the attribute _jacobians
        is not None.
    :type input_set: :class:`~bet.sample.sample_set`
    :param good_sets_prev: Good sets of QoIs of size n - 1.
    :type good_sets_prev: :class:`np.ndarray` of size (num_good_sets_prev, n -
        1) 
    :param unique_indices: Unique QoIs to consider.
    :type unique_indices: :class:`np.ndarray` of size (num_unique_qois, 1)
    :param int num_optsets_return: Number of best sets to return
    :param float measskew_tol: Throw out all sets of QoIs with average
        measure(skewness) number greater than this.
    :param boolean measure: If measure is True, use ``calculate_avg_measure``
        to determine optimal QoIs, else use ``calculate_avg_skewness``
    
    :rtype: tuple
    :returns: (good_sets, best_sets, optsingvals_tensor) where good sets has
        size (num_good_sets, n), best sets has size (num_optsets_return,
        n + 1) and optsingvals_tensor has size (num_centers, n, input_dim)
    
    """

    if input_set._jacobians is None:
        raise ValueError("You must have jacobians to use this method.")

    num_centers = input_set._jacobians.shape[0]
    num_qois_return = good_sets_prev.shape[1] + 1
    comm.Barrier()

    # Initialize best sets and set all skewness values large
    best_sets = np.zeros([num_optsets_return, num_qois_return + 1])
    best_sets[:, 0] = np.inf
    good_sets = np.zeros([1, num_qois_return])
    count_qois = 0
    optsingvals_tensor = np.zeros(
        [num_centers, num_qois_return, num_optsets_return])

    # For each good set of size (n - 1), find the possible sets of size n and
    # compute the average skewness of each
    count_qois = 0
    for i in xrange(good_sets_prev.shape[0]):
        min_ind = np.max(good_sets_prev[i, :])
        # Find all possible combinations of QoIs that include this set of
        # (n - 1)
        if comm.rank == 0:
            inds_notin_set = util.fix_dimensions_vector_2darray(list(set(\
                unique_indices) - set(good_sets_prev[i, :])))

            # Choose only the QoI indices > min_ind so we do not repeat sets
            inds_notin_set = util.fix_dimensions_vector_2darray(inds_notin_set[\
                inds_notin_set > min_ind])
            qoi_combs = util.fix_dimensions_vector_2darray(np.append(np.tile(\
                good_sets_prev[i, :], [inds_notin_set.shape[0], 1]),
                inds_notin_set, axis=1))
            qoi_combs = np.array_split(qoi_combs, comm.size)
        else:
            qoi_combs = None

        # Scatter them throughout the processors
        qoi_combs = comm.scatter(qoi_combs, root=0)

        # For each combination, compute the average measure(skewness) and add
        # the set to good_sets if it is less than measskew_tol
        for qoi_set in xrange(len(qoi_combs)):
            count_qois += 1
            curr_set = util.fix_dimensions_vector_2darray(qoi_combs[qoi_set])\
                .transpose()
            if measure is False:
                (current_measskew,
                 singvals) = calculate_avg_skewness(input_set,
                                                    qoi_combs[qoi_set])
            else:
                (current_measskew,
                 singvals) = calculate_avg_measure(input_set,
                                                   qoi_combs[qoi_set])

            # If its a good set, add it to good_sets
            if current_measskew < measskew_tol:
                good_sets = np.append(good_sets, curr_set, axis=0)

                # If the average skewness is less than the maxskewness
                # in our best_sets, add it to best_sets
                if current_measskew < best_sets[-1, 0]:
                    best_sets[-1, :] = np.append(np.array([current_measskew]),
                                                 qoi_combs[qoi_set])
                    order = best_sets[:, 0].argsort()
                    best_sets = best_sets[order]

                    # Store the corresponding singular values
                    optsingvals_tensor[:, :, -1] = singvals
                    optsingvals_tensor = optsingvals_tensor[:, :, order]

    # Wait for all processes to get to this point
    comm.Barrier()

    # Gather the best sets and skewness values from each processor
    good_sets = comm.gather(good_sets, root=0)
    best_sets = np.array(comm.gather(best_sets, root=0))
    count_qois = np.array(comm.gather(count_qois, root=0))

    # Find the num_optsets_return smallest skewness from all processors
    if comm.rank == 0:

        # Organize the best sets
        best_sets = best_sets.reshape(num_optsets_return * \
            comm.size, num_qois_return + 1)
        [_, uniq_inds_best] = np.unique(best_sets[:, 0], return_index=True)
        best_sets = best_sets[uniq_inds_best, :]
        best_sets = best_sets[best_sets[:, 0].argsort()]
        best_sets = best_sets[:num_optsets_return, :]

        # Organize the good sets
        good_sets_new = np.zeros([1, num_qois_return])
        for each in good_sets:
            good_sets_new = np.append(good_sets_new, each[1:], axis=0)
        good_sets = good_sets_new

        logging.info('Possible sets of QoIs of size {} : {}'.format(\
                good_sets.shape[1], np.sum(count_qois)))
        logging.info('Good sets of QoIs of size {} : {}'.format(\
                good_sets.shape[1], good_sets.shape[0] - 1))

    comm.Barrier()
    best_sets = comm.bcast(best_sets, root=0)
    good_sets = comm.bcast(good_sets, root=0)

    return (good_sets[1:].astype(int), best_sets, optsingvals_tensor)
Ejemplo n.º 6
0
def chooseOptQoIs_verbose(input_set,
                          qoiIndices=None,
                          num_qois_return=None,
                          num_optsets_return=None,
                          inner_prod_tol=1.0,
                          measure=False,
                          remove_zeros=True):
    r"""
    Given gradient vectors at some points (centers) in the parameter space, a
    set of QoIs to choose from, and the number of desired QoIs to return, this
    method returns the ``num_optsets_return`` best sets of QoIs with with
    repsect to either the average measure of the matrix formed by the
    gradient vectors of each QoI map, OR the average skewness of the inverse
    image of this set of QoIs, computed as the product of the singular values
    of the same matrix.  This method is brute force, i.e., if the method is
    given 10,000 QoIs and told to return the N best sets of 3, it will check all
    10,000 choose 3 possible sets.  See chooseOptQoIs_large for a less
    computationally expensive approach.
    
    :param input_set: The input sample set.  Make sure the attribute _jacobians
        is not None
    :type input_set: :class:`~bet.sample.sample_set`
    :param qoiIndices: Set of QoIs to consider.  Default is
        xrange(0, input_set._jacobians.shape[1])
    :type qoiIndices: :class:`np.ndarray` of size (1, num QoIs to consider)
    :param int num_qois_return: Number of desired QoIs to use in the
        inverse problem.  Default is input_dim
    :param int num_optsets_return: Number of best sets to return
        Default is 10
    :param boolean measure: If measure is True, use ``calculate_avg_measure``
        to determine optimal QoIs, else use ``calculate_avg_skewness``
    :param boolean remove_zeros: If True, ``find_unique_vecs`` will remove any
        QoIs that have a zero gradient
    
    :rtype: `np.ndarray` of shape (num_optsets_returned, num_qois_returned + 1)
    :returns: measure_skewness_indices_mat
    
    """

    G = input_set._jacobians
    if G is None:
        raise ValueError("You must have jacobians to use this method.")
    input_dim = input_set._dim
    num_centers = G.shape[0]

    if qoiIndices is None:
        qoiIndices = xrange(0, G.shape[1])
    if num_qois_return is None:
        num_qois_return = input_dim
    if num_optsets_return is None:
        num_optsets_return = 10

    # Remove QoIs that have zero gradients at any of the centers
    qoiIndices = find_unique_vecs(input_set, inner_prod_tol, qoiIndices,
                                  remove_zeros)

    # Find all posible combinations of QoIs
    if comm.rank == 0:
        qoi_combs = np.array(
            list(combinations(list(qoiIndices), num_qois_return)))
        logging.info('Possible sets of QoIs : {}'.format(qoi_combs.shape[0]))
        qoi_combs = np.array_split(qoi_combs, comm.size)
    else:
        qoi_combs = None

    # Scatter them throughout the processors
    qoi_combs = comm.scatter(qoi_combs, root=0)

    # For each combination, check the skewness and keep the sets
    # that have the smallest skewness
    measure_skewness_indices_mat = np.zeros(
        [num_optsets_return, num_qois_return + 1])
    measure_skewness_indices_mat[:, 0] = np.inf
    optsingvals_tensor = np.zeros(
        [num_centers, num_qois_return, num_optsets_return])
    for qoi_set in xrange(len(qoi_combs)):
        if measure == False:
            (current_measskew,
             singvals) = calculate_avg_skewness(input_set, qoi_combs[qoi_set])
        else:
            (current_measskew,
             singvals) = calculate_avg_measure(input_set, qoi_combs[qoi_set])

        if current_measskew < measure_skewness_indices_mat[-1, 0]:
            measure_skewness_indices_mat[-1, :] = np.append(np.array(\
                    [current_measskew]), qoi_combs[qoi_set])
            order = measure_skewness_indices_mat[:, 0].argsort()
            measure_skewness_indices_mat = measure_skewness_indices_mat[order]

            optsingvals_tensor[:, :, -1] = singvals
            optsingvals_tensor = optsingvals_tensor[:, :, order]

    # Wait for all processes to get to this point
    comm.Barrier()

    # Gather the best sets and skewness values from each processor
    measure_skewness_indices_mat = np.array(comm.gather(\
            measure_skewness_indices_mat, root=0))
    optsingvals_tensor = np.array(comm.gather(optsingvals_tensor, root=0))

    # Find the num_optsets_return smallest skewness values from all processors
    if comm.rank == 0:
        measure_skewness_indices_mat = measure_skewness_indices_mat.reshape(\
                num_optsets_return * comm.size, num_qois_return + 1)
        optsingvals_tensor = optsingvals_tensor.reshape(
            num_centers, num_qois_return, num_optsets_return * comm.size)
        order = measure_skewness_indices_mat[:, 0].argsort()

        measure_skewness_indices_mat = measure_skewness_indices_mat[order]
        measure_skewness_indices_mat = measure_skewness_indices_mat[\
                :num_optsets_return, :]

        optsingvals_tensor = optsingvals_tensor[:, :, order]
        optsingvals_tensor = optsingvals_tensor[:, :, :num_optsets_return]

    measure_skewness_indices_mat = comm.bcast(measure_skewness_indices_mat,
                                              root=0)
    optsingvals_tensor = comm.bcast(optsingvals_tensor, root=0)

    return (measure_skewness_indices_mat, optsingvals_tensor)
Ejemplo n.º 7
0
    def generalized_chains(self,
                           param_min,
                           param_max,
                           t_set,
                           kern,
                           savefile,
                           initial_sample_type="lhs",
                           criterion='center'):
        """
        Basic adaptive sampling algorithm using generalized chains.
       
        :param string initial_sample_type: type of initial sample random (or r),
            latin hypercube(lhs), or space-filling curve(TBD)
        :param param_min: minimum value for each parameter dimension
        :type param_min: :class:`numpy.ndarray` (ndim,)
        :param param_max: maximum value for each parameter dimension
        :type param_max: :class:`numpy.ndarray` (ndim,)
        :param t_set: method for creating new parameter steps using
            given a step size based on the paramter domain size
        :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set`
        :param kern: functional that acts on the data used to
            determine the proposed change to the ``step_size``
        :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object.
        :param string savefile: filename to save samples and data
        :param string criterion: latin hypercube criterion see 
            `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_
        :rtype: tuple
        :returns: (``parameter_samples``, ``data_samples``, ``all_step_ratios``) where
            ``parameter_samples`` is np.ndarray of shape (num_samples, ndim),
            ``data_samples`` is np.ndarray of shape (num_samples, mdim), and 
            ``all_step_ratios`` is np.ndarray of shape (num_chains,
            chain_length)
        """
        if comm.size > 1:
            psavefile = os.path.join(
                os.path.dirname(savefile),
                "proc{}{}".format(comm.rank, os.path.basename(savefile)))

        # Initialize Nx1 vector Step_size = something reasonable (based on size
        # of domain and transition set type)
        # Calculate domain size
        param_left = np.repeat([param_min], self.num_chains_pproc, 0)
        param_right = np.repeat([param_max], self.num_chains_pproc, 0)

        param_width = param_right - param_left
        # Calculate step_size
        max_ratio = t_set.max_ratio
        min_ratio = t_set.min_ratio
        step_ratio = t_set.init_ratio * np.ones(self.num_chains_pproc)

        # Initiative first batch of N samples (maybe taken from latin
        # hypercube/space-filling curve to fully explore parameter space - not
        # necessarily random). Call these Samples_old.
        (samples_old,
         data_old) = super(sampler,
                           self).random_samples(initial_sample_type, param_min,
                                                param_max, savefile,
                                                self.num_chains, criterion)
        self.num_samples = self.chain_length * self.num_chains
        comm.Barrier()

        # now split it all up
        if comm.size > 1:
            MYsamples_old = np.empty((np.shape(samples_old)[0] / comm.size,
                                      np.shape(samples_old)[1]))
            comm.Scatter([samples_old, MPI.DOUBLE],
                         [MYsamples_old, MPI.DOUBLE])
            MYdata_old = np.empty(
                (np.shape(data_old)[0] / comm.size, np.shape(data_old)[1]))
            comm.Scatter([data_old, MPI.DOUBLE], [MYdata_old, MPI.DOUBLE])
        else:
            MYsamples_old = np.copy(samples_old)
            MYdata_old = np.copy(data_old)

        samples = MYsamples_old
        data = MYdata_old
        all_step_ratios = step_ratio
        (kern_old, proposal) = kern.delta_step(MYdata_old, None)
        mdat = dict()
        self.update_mdict(mdat)

        for batch in xrange(1, self.chain_length):
            # For each of N samples_old, create N new parameter samples using
            # transition set and step_ratio. Call these samples samples_new.
            samples_new = t_set.step(step_ratio, param_width, param_left,
                                     param_right, MYsamples_old)

            # Solve the model for the samples_new.
            data_new = self.lb_model(samples_new)

            # Make some decision about changing step_size(k).  There are
            # multiple ways to do this.
            # Determine step size
            (kern_old, proposal) = kern.delta_step(data_new, kern_old)
            step_ratio = proposal * step_ratio
            # Is the ratio greater than max?
            step_ratio[step_ratio > max_ratio] = max_ratio
            # Is the ratio less than min?
            step_ratio[step_ratio < min_ratio] = min_ratio

            # Save and export concatentated arrays
            if self.chain_length < 4:
                pass
            elif (batch + 1) % (self.chain_length / 4) == 0:
                print "Current chain length: " + str(batch + 1) + "/" + str(
                    self.chain_length)
            samples = np.concatenate((samples, samples_new))
            data = np.concatenate((data, data_new))
            all_step_ratios = np.concatenate((all_step_ratios, step_ratio))
            mdat['step_ratios'] = all_step_ratios
            mdat['samples'] = samples
            mdat['data'] = data
            if comm.size > 1:
                super(sampler, self).save(mdat, psavefile)
            else:
                super(sampler, self).save(mdat, savefile)
            MYsamples_old = samples_new

        # collect everything
        MYsamples = np.copy(samples)
        MYdata = np.copy(data)
        MYall_step_ratios = np.copy(all_step_ratios)
        # ``parameter_samples`` is np.ndarray of shape (num_samples, ndim)
        samples = util.get_global_values(MYsamples,
                                         shape=(self.num_samples,
                                                np.shape(MYsamples)[1]))
        # and ``data_samples`` is np.ndarray of shape (num_samples, mdim)
        data = util.get_global_values(MYdata,
                                      shape=(self.num_samples,
                                             np.shape(MYdata)[1]))
        # ``all_step_ratios`` is np.ndarray of shape (num_chains,
        # chain_length)
        all_step_ratios = util.get_global_values(MYall_step_ratios,
                                                 shape=(self.num_samples, ))
        all_step_ratios = np.reshape(all_step_ratios,
                                     (self.num_chains, self.chain_length))

        # save everything
        mdat['step_ratios'] = all_step_ratios
        mdat['samples'] = samples
        mdat['data'] = data
        super(sampler, self).save(mdat, savefile)

        return (samples, data, all_step_ratios)
Ejemplo n.º 8
0
    def generalized_chains(self,
                           input_obj,
                           t_set,
                           kern,
                           savefile,
                           initial_sample_type="random",
                           criterion='center',
                           hot_start=0):
        """
        Basic adaptive sampling algorithm using generalized chains.

        .. todo::

            Test HOTSTART from parallel files using different num proc

        :param string initial_sample_type: type of initial sample random (or r),
            latin hypercube(lhs), or space-filling curve(TBD)
        :param input_obj: Either a :class:`bet.sample.sample_set` object for an
            input space, an array of min and max bounds for the input values
            with ``min = input_domain[:, 0]`` and ``max = input_domain[:, 1]``,
            or the dimension of an input space
        :type input_obj: :class:`~bet.sample.sample_set`,
            :class:`numpy.ndarray` of shape (ndim, 2), or :class: `int`
        :param t_set: method for creating new parameter steps using
            given a step size based on the paramter domain size
        :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set`
        :param kern: functional that acts on the data used to
            determine the proposed change to the ``step_size``
        :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object.
        :param string savefile: filename to save samples and data
        :param int hot_start: Flag whether or not hot start the sampling
            chains from a previous set of chains. Note that ``num_chains`` must
            be the same, but ``num_chains_pproc`` need not be the same. 0 -
            cold start, 1 - hot start from uncompleted run, 2 - hot
            start from finished run
        :param string criterion: latin hypercube criterion see 
            `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_
        
        :rtype: tuple
        :returns: (``discretization``, ``all_step_ratios``) where
            ``discretization`` is a :class:`~bet.sample.discretization` object
            containing ``num_samples``  and  ``all_step_ratios`` is np.ndarray
            of shape ``(num_chains, chain_length)``
        
        """

        # Calculate step_size
        max_ratio = t_set.max_ratio
        min_ratio = t_set.min_ratio

        if not hot_start:
            logging.info("COLD START")
            step_ratio = t_set.init_ratio * np.ones(self.num_chains_pproc)

            # Initiative first batch of N samples (maybe taken from latin
            # hypercube/space-filling curve to fully explore parameter space -
            # not necessarily random). Call these Samples_old.
            disc_old = super(sampler, self).create_random_discretization(
                initial_sample_type,
                input_obj,
                savefile,
                self.num_chains,
                criterion,
                globalize=False)
            self.num_samples = self.chain_length * self.num_chains
            comm.Barrier()

            # populate local values
            #disc_old._input_sample_set.global_to_local()
            #disc_old._output_sample_set.global_to_local()
            input_old = disc_old._input_sample_set.copy()

            disc = disc_old.copy()
            all_step_ratios = step_ratio

            (kern_old, proposal) = kern.delta_step(disc_old.\
                    _output_sample_set.get_values_local(), None)

            start_ind = 1

        if hot_start:
            # LOAD FILES
            _, disc, all_step_ratios, kern_old = loadmat(
                savefile,
                lb_model=None,
                hot_start=hot_start,
                num_chains=self.num_chains)
            # MAKE SURE ARRAYS ARE LOCALIZED FROM HERE ON OUT WILL ONLY
            # OPERATE ON _local_values
            # Set mdat, step_ratio, input_old, start_ind appropriately
            step_ratio = all_step_ratios[-self.num_chains_pproc:]
            input_old = sample.sample_set(disc._input_sample_set.get_dim())
            input_old.set_domain(disc._input_sample_set.get_domain())
            input_old.set_values_local(disc._input_sample_set.\
                    get_values_local()[-self.num_chains_pproc:, :])

            # Determine how many batches have been run
            start_ind = disc._input_sample_set.get_values_local().\
                    shape[0]/self.num_chains_pproc

        mdat = dict()
        self.update_mdict(mdat)
        input_old.update_bounds_local()

        for batch in xrange(start_ind, self.chain_length):
            # For each of N samples_old, create N new parameter samples using
            # transition set and step_ratio. Call these samples input_new.
            input_new = t_set.step(step_ratio, input_old)

            # Solve the model for the input_new.
            output_new_values = self.lb_model(input_new.get_values_local())

            # Make some decision about changing step_size(k).  There are
            # multiple ways to do this.
            # Determine step size
            (kern_old, proposal) = kern.delta_step(output_new_values, kern_old)
            step_ratio = proposal * step_ratio
            # Is the ratio greater than max?
            step_ratio[step_ratio > max_ratio] = max_ratio
            # Is the ratio less than min?
            step_ratio[step_ratio < min_ratio] = min_ratio

            # Save and export concatentated arrays
            if self.chain_length < 4:
                pass
            elif comm.rank == 0 and (batch + 1) % (self.chain_length / 4) == 0:
                logging.info("Current chain length: "+\
                            str(batch+1)+"/"+str(self.chain_length))
            disc._input_sample_set.append_values_local(input_new.\
                    get_values_local())
            disc._output_sample_set.append_values_local(output_new_values)
            all_step_ratios = np.concatenate((all_step_ratios, step_ratio))
            mdat['step_ratios'] = all_step_ratios
            mdat['kern_old'] = kern_old

            super(sampler, self).save(mdat, savefile, disc, globalize=False)
            input_old = input_new

        # collect everything
        disc._input_sample_set.update_bounds_local()
        #disc._input_sample_set.local_to_global()
        #disc._output_sample_set.local_to_global()

        MYall_step_ratios = np.copy(all_step_ratios)
        # ``all_step_ratios`` is np.ndarray of shape (num_chains,
        # chain_length)
        all_step_ratios = util.get_global_values(MYall_step_ratios,
                                                 shape=(self.num_samples, ))
        all_step_ratios = np.reshape(all_step_ratios,
                                     (self.num_chains, self.chain_length), 'F')

        # save everything
        mdat['step_ratios'] = all_step_ratios
        mdat['kern_old'] = util.get_global_values(kern_old,
                                                  shape=(self.num_chains, ))
        super(sampler, self).save(mdat, savefile, disc, globalize=True)

        return (disc, all_step_ratios)