Beispiel #1
0
    def forward(self, X):
        """Map full variables to active variables.

        Map the points in the original input space to the active and inactive
        variables.

        Parameters
        ----------
        X : ndarray
            an M-by-m matrix, each row of `X` is a point in the original
            parameter space

        Returns
        -------
        Y : ndarray
            M-by-n matrix that contains points in the space of active variables.
            Each row of `Y` corresponds to a row of `X`.
        Z : ndarray
            M-by-(m-n) matrix that contains points in the space of inactive
            variables. Each row of `Z` corresponds to a row of `X`.

        """
        X = process_inputs(X)[0]
        W1, W2 = self.domain.subspaces.W1, self.domain.subspaces.W2
        Y, Z = np.dot(X, W1), np.dot(X, W2)
        return Y, Z
Beispiel #2
0
    def forward(self, X):
        """Map full variables to active variables.
        
        Map the points in the original input space to the active and inactive
        variables.

        Parameters
        ----------
        X : ndarray
            an M-by-m matrix, each row of `X` is a point in the original 
            parameter space

        Returns
        -------
        Y : ndarray 
            M-by-n matrix that contains points in the space of active variables.
            Each row of `Y` corresponds to a row of `X`.
        Z : ndarray 
            M-by-(m-n) matrix that contains points in the space of inactive 
            variables. Each row of `Z` corresponds to a row of `X`.

        """
        X = process_inputs(X)[0]
        W1, W2 = self.domain.subspaces.W1, self.domain.subspaces.W2
        Y, Z = np.dot(X, W1), np.dot(X, W2)
        return Y, Z
Beispiel #3
0
    def inverse(self, Y, N=1):
        """
        Map the points in the active variable space to the original parameter
        space.

        :param ndarray Y: M-by-n matrix that contains points in the space
            of active variables.
        :param int N: The number of points in the original parameter space
            that are returned that map to the given active variables.

        :return: X, (M*N)-by-m matrix that contains points in the original
            parameter space.
        :rtype: ndarray

        :return: ind, (M*N)-by-1 matrix that contains integer indices. These
            indices identify which rows of `X` map to which rows of `Y`.
        :rtype: ndarray

        **Notes**

        The inverse map depends critically on the `regularize_z` function.
        """
        # check inputs
        Y, NY, n = process_inputs(Y)

        if not isinstance(N, int):
            raise TypeError('N must be an int')

        logging.getLogger(__name__).debug('Inverting {:d} y\'s with {:d} z\'s per y.'.format(NY, N))

        Z = self.regularize_z(Y, N)
        W = self.domain.subspaces.eigenvectors
        X, ind = _rotate_x(Y, Z, W)
        return X, ind
Beispiel #4
0
def spectral_decomposition(df):
    """
    Use the SVD to compute the eigenvectors and eigenvalues for the
    active subspace analysis.

    :param ndarray df: ndarray of size M-by-m that contains evaluations of the gradient.

    :return: [e, W], [ eigenvalues, eigenvectors ]
    :rtype: [ndarray, ndarray]

    **Notes**

    If the number M of gradient samples is less than the dimension m of the
    inputs, then the method builds an arbitrary basis for the nullspace, which
    corresponds to the inactive subspace.
    """
    # set integers
    df, M, m = process_inputs(df)

    # compute active subspace
    if M >= m:
        U, sig, W = np.linalg.svd(df, full_matrices=False)
    else:
        U, sig, W = np.linalg.svd(df, full_matrices=True)
        sig = np.hstack((np.array(sig), np.zeros(m-M)))
    e = (sig**2) / M
    W = W.T
    W = W*np.sign(W[0,:])
    return e.reshape((m,1)), W.reshape((m,m))
Beispiel #5
0
    def compute(self, df, n_boot=200):
        """
        Compute the active and inactive subspaces from a collection of
        sampled gradients.

        :param ndarray df: an ndarray of size M-by-m that contains evaluations of the gradient.
        :param int n_boot: number of bootstrap replicates to use when computing bootstrap ranges.

        **Notes**

        This method sets the class's attributes `W1`, `W2`, `eigenvalues`, and
        `eigenvectors`. If `n_boot` is greater than zero, then this method
        also runs a bootstrap to compute and set `e_br` and `sub_br`.
        """
        df, M, m = process_inputs(df)

        if not isinstance(n_boot, int):
            raise TypeError('n_boot must be an integer.')

        # compute eigenvalues and eigenvecs
        logging.getLogger('PAUL').info('Computing spectral decomp with {:d} samples in {:d} dims.'.format(M, m))
        evals, evecs = spectral_decomposition(df)
        self.eigenvalues, self.eigenvectors = evals, evecs

        # compute bootstrap ranges for eigenvalues and subspace distances
        if n_boot > 0:
            logging.getLogger('PAUL').info('Bootstrapping {:d} spectral decomps of size {:d} by {:d}.'.format(n_boot, M, m))
            e_br, sub_br = bootstrap_ranges(df, evals, evecs, n_boot=n_boot)
            self.e_br, self.sub_br = e_br, sub_br

        # partition the subspaces with a crappy heuristic
        n = compute_partition(evals)
        self.partition(n)
Beispiel #6
0
def finite_difference_gradients(X, fun, h=1e-6):
    """
    Compute finite difference gradients with a given interface.

    :param ndarray X: M-by-m matrix that contains the points to estimate the
        gradients with finite differences.
    :param function fun: Function that returns the simulation's quantity of
        interest given inputs.
    :param float h: The finite difference step size.

    :return: df, M-by-m matrix that contains estimated partial derivatives
        approximated by finite differences
    :rtype: ndarray
    """
    X, M, m = process_inputs(X)
    logging.getLogger(__name__).debug('Computing finite diff grads at {:d} points in {:d} dims.'.format(M, m))

    # points to run simulations including the perturbed inputs
    XX = np.kron(np.ones((m+1, 1)),X) + \
        h*np.kron(np.vstack((np.zeros((1, m)), np.eye(m))), np.ones((M, 1)))

    # run the simulation
    if isinstance(fun, SimulationRunner):
        F = fun.run(XX)
    else:
        F = SimulationRunner(fun).run(XX)

    df = (F[M:].reshape((m, M)).transpose() - F[:M]) / h
    return df.reshape((M,m))
Beispiel #7
0
def finite_difference_gradients(X, fun, h=1e-6):
    """Compute finite difference gradients with a given interface.

    Parameters
    ----------
    X : ndarray 
        M-by-m matrix that contains the points to estimate the gradients with 
        finite differences
    fun : function
        function that returns the simulation's quantity of interest given inputs
    h : float, optional 
        the finite difference step size (default 1e-6)

    Returns
    -------
    df : ndarray 
        M-by-m matrix that contains estimated partial derivatives approximated 
        by finite differences
    """
    X, M, m = process_inputs(X)

    # points to run simulations including the perturbed inputs
    XX = np.kron(np.ones((m+1, 1)),X) + \
        h*np.kron(np.vstack((np.zeros((1, m)), np.eye(m))), np.ones((M, 1)))

    # run the simulation
    if isinstance(fun, SimulationRunner):
        F = fun.run(XX)
    else:
        F = SimulationRunner(fun).run(XX)

    df = (F[M:].reshape((m, M)).transpose() - F[:M]) / h
    return df.reshape((M,m))
Beispiel #8
0
def normalized_active_subspace_x(X, df, weights):
    """
    TODO: docs
    """
    df, M, m = process_inputs(df)
    
    # get row norms
    ndf = np.sqrt(np.sum(df*df, axis=1))
    nX = np.sqrt(np.sum(X*X, axis=1))
    
    # find rows with norm too close to zero and set elements to exactly zero
    ind = ndf < SQRTEPS
    df[ind,:], ndf[ind] = 0.0, 1.0
    
    ind = nX < SQRTEPS
    X[ind,:], nX[ind] = 0.0, 1.0
    
    # normalize rows
    df = df / ndf.reshape((M, 1))
    X = X / nX.reshape((M, 1))
    
    # compute the matrix
    A = np.dot(df.transpose(), X * weights)
    C = 0.5*(A + A.transpose())
    
    return sorted_eigh(C)           
Beispiel #9
0
def local_linear_gradients(X, f, p=None, weights=None):
    """Estimate a collection of gradients from input/output pairs.
    
    Given a set of input/output pairs, choose subsets of neighboring points and
    build a local linear model for each subset. The gradients of these local
    linear models comprise estimates of sampled gradients.

    Parameters
    ----------
    X : ndarray 
        M-by-m matrix that contains the m-dimensional inputs
    f : ndarray 
        M-by-1 matrix that contains scalar outputs
    p : int, optional
        how many nearest neighbors to use when constructing the local linear 
        model (default 1)
    weights : ndarray, optional
        M-by-1 matrix that contains the weights for each observation (default 
        None)

    Returns
    -------
    df : ndarray
        M-by-m matrix that contains estimated partial derivatives approximated 
        by the local linear models

    Notes
    -----
    If `p` is not specified, the default value is floor(1.7*m).
    """

    X, M, m = process_inputs(X)
    if M<=m: raise Exception('Not enough samples for local linear models.')

    if p is None:
        p = int(np.minimum(np.floor(1.7*m), M))
    elif not isinstance(p, int):
        raise TypeError('p must be an integer.')

    if p < m+1 or p > M:
        raise Exception('p must be between m+1 and M')
        
    if weights is None:
        weights = np.ones((M, 1)) / M

    MM = np.minimum(int(np.ceil(10*m*np.log(m))), M-1)
    df = np.zeros((MM, m))
    for i in range(MM):
        ii = np.random.randint(M)
        x = X[ii,:]
        D2 = np.sum((X - x)**2, axis=1)
        ind = np.argsort(D2)
        ind = ind[D2 != 0]
        A = np.hstack((np.ones((p,1)), X[ind[:p],:])) * np.sqrt(weights[ii])
        b = f[ind[:p]] * np.sqrt(weights[ii])
        u = np.linalg.lstsq(A, b)[0]
        df[i,:] = u[1:].T
    return df
Beispiel #10
0
def active_subspace(df, weights):
    """
    TODO: docs
    """
    df, M, m = process_inputs(df)
        
    # compute the matrix
    C = np.dot(df.transpose(), df * weights)
    
    return sorted_eigh(C)
Beispiel #11
0
    def predict(self, X, compgrad=False):
        """
        Compute the value of the response surface at given values of the
        simulation inputs.

        :param ndarray X: M-by-m matrix containing points in simulation's
            input space.
        :param bool compgrad: Determines if the gradient of the response surface is
            computed and returned. (Default is False)

        :return: response surface values at the given `X`.
        :return: df :  estimated gradient at the given `X`. If `compgrad` is False, then `df` is
            None.

        :rtype: response_surface
        :rtype: ndarray (M-by-m)

        **See Also**

        response_surfaces.ActiveSubspaceResponseSurface

        **Notes**

        The default response surface is a radial basis function approximation
        using an exponential-squared (i.e., Gaussian) radial basis. The
        eigenvalues from the active subspace analysis are used to determine the
        characteristic length scales for each of the active variables. In other
        words the radial basis is anisotropic, and the anisotropy is determined
        by the eigenvalues.

        The response surface also has a quadratic monomial basis. The
        coefficients of the monomial basis are fit with weighted least-squares.

        In practice, this is equivalent to a kriging or Gaussian process
        approximation. However, such approximations bring several assumptions
        about noisy data that are difficult, if not impossible, to verify in
        computer simulations. I chose to avoid the difficulties that come with
        such methods. That means there is no so-called prediction variance
        associated with the response surface prediction. Personally, I think
        this is better. The prediction variance has no connection to the
        approximation error, except in very special cases. I prefer not to
        confuse the user with things that look and smell like error bars but
        aren't actually error bars.
        """
        if not isinstance(compgrad, bool):
            raise TypeError('compgrad should be a boolean')

        X, M, m = process_inputs(X)

        if m != self.m:
            raise Exception('The dimension of the points is {:d} but should \
                be {:d}.'.format(m, self.m))
        f, df = self.as_respsurf.predict(X, compgrad=compgrad)
        return f, df
Beispiel #12
0
def active_subspace_x(X, df, weights):
    """
    TODO: docs
    """
    df, M, m = process_inputs(df)
    
    # compute the matrix
    A = np.dot(df.transpose(), X * weights)
    C = 0.5*(A + A.transpose())
    
    return sorted_eigh(C)
Beispiel #13
0
def bootstrap_ranges(e, W, X, f, df, weights, ssmethod, nboot=100):
    """
    TODO: docs
    """
    if df is not None:
        df, M, m = process_inputs(df)
    else:
        X, M, m = process_inputs(X)
    
    e_boot = np.zeros((m, nboot))
    sub_dist = np.zeros((m-1, nboot))
    sub_det = np.zeros((m-1, nboot))
    
    # TODO: should be able to parallelize this
    for i in range(nboot):
        X0, f0, df0, weights0 = bootstrap_replicate(X, f, df, weights)
        e0, W0 = ssmethod(X0, f0, df0, weights0)
        e_boot[:,i] = e0.reshape((m,))
        for j in range(m-1):
            sub_dist[j,i] = np.linalg.norm(np.dot(W[:,:j+1].T, W0[:,j+1:]), ord=2)
            sub_det[j,i] = np.linalg.det(np.dot(W[:,:j+1].T, W0[:,:j+1]))
    
    # bootstrap ranges for the eigenvalues
    e_br = np.hstack(( np.amin(e_boot, axis=1).reshape((m, 1)), \
                        np.amax(e_boot, axis=1).reshape((m, 1)) ))
    
    # bootstrap ranges and mean for subspace distance
    sub_br = np.hstack(( np.amin(sub_dist, axis=1).reshape((m-1, 1)), \
                        np.mean(sub_dist, axis=1).reshape((m-1, 1)), \
                        np.amax(sub_dist, axis=1).reshape((m-1, 1)) ))
    
    # metric from Li's ladle plot paper
    li_F = np.vstack(( np.zeros((1,1)), np.sum(1.0 - np.fabs(sub_det), axis=1).reshape((m-1, 1)) / nboot ))
    li_F = li_F / np.sum(li_F)

    return e_br, sub_br, li_F
Beispiel #14
0
def local_linear_gradients(X, f, p=None, weights=None):
    """
    Estimate a collection of gradients from input/output pairs.

    :param ndarray X: M-by-m matrix that contains the m-dimensional inputs.
    :param ndarray f: M-by-1 matrix that contains scalar outputs.
    :param int p: How many nearest neighbors to use when constructing the
        local linear model.
    :param ndarray weights: M-by-1 matrix that contains the weights for
        each observation.

    :return df: M-by-m matrix that contains estimated partial derivatives
        approximated by the local linear models.
    :rtype: ndarray

    **Notes**

    If `p` is not specified, the default value is floor(1.7*m).
    """

    X, M, m = process_inputs(X)
    if M<=m: raise Exception('Not enough samples for local linear models.')

    if p is None:
        p = int(np.minimum(np.floor(1.7*m), M))
    elif not isinstance(p, int):
        raise TypeError('p must be an integer.')

    if p < m+1 or p > M:
        raise Exception('p must be between m+1 and M')
        
    if weights is None:
        weights = np.ones((M, 1)) / M

    MM = np.minimum(int(np.ceil(10*m*np.log(m))), M-1)
    logging.getLogger(__name__).debug('Computing {:d} local linear approximations with {:d} points in {:d} dims.'.format(MM, M, m))
    df = np.zeros((MM, m))
    for i in range(MM):
        ii = np.random.randint(M)
        x = X[ii,:]
        D2 = np.sum((X - x)**2, axis=1)
        ind = np.argsort(D2)
        ind = ind[D2 != 0]
        A = np.hstack((np.ones((p,1)), X[ind[:p],:])) * np.sqrt(weights[ii])
        b = f[ind[:p]] * np.sqrt(weights[ii])
        u = np.linalg.lstsq(A, b)[0]
        df[i,:] = u[1:].T
    return df
Beispiel #15
0
    def forward(self, X):
        """
        Map the points in the original input space to the active and inactive
        variables.

        :param ndarray X: An M-by-m matrix. Each row of `X` is a point in the
            original parameter space

        :return: Y, M-by-n matrix that contains points in the space of active
            variables. Each row of `Y` corresponds to a row of `X`.
        :rtype: ndarray

        :return: Z, M-by-(m-n) matrix that contains points in the space of
            inactive variables. Each row of `Z` corresponds to a row of `X`.
        :rtype: ndarray

        """
        X = process_inputs(X)[0]
        W1, W2 = self.domain.subspaces.W1, self.domain.subspaces.W2
        Y, Z = np.dot(X, W1), np.dot(X, W2)
        return Y, Z
Beispiel #16
0
    def inverse(self, Y, N=1):
        """Find points in full space that map to active variable points.
        
        Map the points in the active variable space to the original parameter
        space.
        
        Parameters
        ----------
        Y : ndarray
            M-by-n matrix that contains points in the space of active variables
        N : int, optional
            the number of points in the original parameter space that are 
            returned that map to the given active variables (default 1)

        Returns
        -------
        X : ndarray
            (M*N)-by-m matrix that contains points in the original parameter 
            space
        ind : ndarray
            (M*N)-by-1 matrix that contains integer indices. These indices 
            identify which rows of `X` map to which rows of `Y`.

        Notes
        -----
        The inverse map depends critically on the `regularize_z` function.
        """
        # check inputs
        Y, NY, n = process_inputs(Y)

        if not isinstance(N, int):
            raise TypeError('N must be an int')

        Z = self.regularize_z(Y, N)
        W = self.domain.subspaces.eigenvecs
        X, ind = _rotate_x(Y, Z, W)
        return X, ind
Beispiel #17
0
    def inverse(self, Y, N=1):
        """Find points in full space that map to active variable points.

        Map the points in the active variable space to the original parameter
        space.

        Parameters
        ----------
        Y : ndarray
            M-by-n matrix that contains points in the space of active variables
        N : int, optional
            the number of points in the original parameter space that are
            returned that map to the given active variables (default 1)

        Returns
        -------
        X : ndarray
            (M*N)-by-m matrix that contains points in the original parameter
            space
        ind : ndarray
            (M*N)-by-1 matrix that contains integer indices. These indices
            identify which rows of `X` map to which rows of `Y`.

        Notes
        -----
        The inverse map depends critically on the `regularize_z` function.
        """
        # check inputs
        Y, NY, n = process_inputs(Y)

        if not isinstance(N, Integral):
            raise TypeError('N must be an int')

        Z = self.regularize_z(Y, N)
        W = self.domain.subspaces.eigenvecs
        X, ind = _rotate_x(Y, Z, W)
        return X, ind
Beispiel #18
0
def active_subspace(df, weights):
    """Compute the active subspace.

    Parameters
    ----------
    df : ndarray
        M-by-m matrix containing the gradient samples oriented as rows
    weights : ndarray
        M-by-1 weight vector, corresponds to numerical quadrature rule used to
        estimate matrix whose eigenspaces define the active subspace

    Returns
    -------
    e : ndarray
        m-by-1 vector of eigenvalues
    W : ndarray
        m-by-m orthogonal matrix of eigenvectors
    """
    df, M, m = process_inputs(df)

    # compute the matrix
    C = np.dot(df.transpose(), df * weights)

    return sorted_eigh(C)
def active_subspace(df, weights):
    """Compute the active subspace.
    
    Parameters
    ----------
    df : ndarray
        M-by-m matrix containing the gradient samples oriented as rows
    weights : ndarray
        M-by-1 weight vector, corresponds to numerical quadrature rule used to
        estimate matrix whose eigenspaces define the active subspace
        
    Returns
    -------
    e : ndarray
        m-by-1 vector of eigenvalues
    W : ndarray
        m-by-m orthogonal matrix of eigenvectors
    """
    df, M, m = process_inputs(df)
        
    # compute the matrix
    C = np.dot(df.transpose(), df * weights)
    
    return sorted_eigh(C)
Beispiel #20
0
def bootstrap_ranges(df, e, W,f=0,X=0,c_index=0,n_boot=200):
    """
    Use a nonparametric bootstrap to estimate variability in the computed
    eigenvalues and subspaces.

    :param ndarray df: M-by-m matrix of evaluations of the gradient.
    :param ndarray e: m-by-1 vector of eigenvalues.
    :param ndarray W: eigenvectors.
    :param ndarray f: M-by-1 vector of function evaluations.
    :param ndarray X: M-by-m array for c_index = 0,1,2,3 *******OR******** M-by-2m matrix for c_index = 4.
    :param int c_index: an integer specifying which C matrix to compute, the default matrix is 0
    :param int n_boot: index number for alternative subspaces.
    
    :return: [e_br, sub_br], e_br: m-by-2 matrix that contains the bootstrap ranges for the eigenvalues, sub_br: m-by-3 matrix that contains the bootstrap ranges (first and third column) and the mean (second column) of the error in the estimated subspaces approximated by bootstrap
    :rtype: [ndarray, ndarray]

    **Notes**

    The mean of the subspace distance bootstrap replicates is an interesting
    quantity. Still trying to figure out what its precise relation is to
    the subspace error. They seem to behave similarly in test problems. And an
    initial "coverage" study suggested that they're unbiased for a quadratic
    test problem. Quadratics, though, may be special cases.
    """
    # number of gradient samples and dimension
    if c_index != 4:
        df, M, m = process_inputs(df)
    else:
        M = int(np.shape(X)[0])
        m = int((np.shape(X)[1]/2))
        
    # bootstrap
    e_boot = np.zeros((m, n_boot))
    sub_dist = np.zeros((m-1, n_boot))
    ind = np.random.randint(M, size=(M, n_boot))

    # can i parallelize this?
    for i in range(n_boot):
        if c_index == 0:
            e0, W0 = spectral_decomposition(df[ind[:,i],:])
        elif c_index == 1:
            e0, W0 = spectral_decomposition(df=df[ind[:,i],:],f=f,X=X[ind[:,i],:],c_index=c_index)
        elif c_index == 2:
            e0, W0 = spectral_decomposition(df[ind[:,i],:],c_index=c_index)
        elif c_index == 3:
            e0, W0 = spectral_decomposition(df[ind[:,i],:],f,X=X[ind[:,i],:],c_index=c_index)
        elif c_index == 4:
            f_x = f[ind[:,i]]
            f_y = f[ind[:,i]+M]
            f = np.append([[f_x]],[[f_y]],axis=0)
            f = f.reshape(2*np.size(f_x))
            e0, W0 = spectral_decomposition(df=0,f=f,X=X[ind[:,i],:],c_index=c_index)
        e_boot[:,i] = e0.reshape((m,))
        for j in range(m-1):
            sub_dist[j,i] = np.linalg.norm(np.dot(W[:,:j+1].T, W0[:,j+1:]), ord=2)

    e_br = np.zeros((m, 2))
    sub_br = np.zeros((m-1, 3))
    for i in range(m):
        e_br[i,0] = np.amin(e_boot[i,:])
        e_br[i,1] = np.amax(e_boot[i,:])
    for i in range(m-1):
        sub_br[i,0] = np.amin(sub_dist[i,:])
        sub_br[i,1] = np.mean(sub_dist[i,:])
        sub_br[i,2] = np.amax(sub_dist[i,:])

    return e_br, sub_br
Beispiel #21
0
    def build_from_data(self, X, f, df=None, avdim=None):
        """
        Build the active subspace-enabled model with input/output pairs.

        :param ndarray X: M-by-m matrix with evaluations of the m-dimensional
            simulation inputs.
        :param ndarray f: M-by-1 matrix with corresponding simulation quantities
            of interest.
        :param ndarray df: M-by-m matrix that contains the gradients of the
            simulation quantity of interest, oriented row-wise, that correspond
            to the rows of `X`. If `df` is not present, then it is estimated
            with crude local linear models using the pairs `X` and `f`.
        :param int avdim: The dimension of the active subspace. If `avdim`
            is not present, a crude heuristic is used to choose an active
            subspace dimension based on the given data `X` and
            `f`---and possible `df`.

        **Notes**

        This method follows these steps:

        #. If `df` is None, estimate it with local linear models using the \
        input/output pairs `X` and `f`.
        #. Compute the active and inactive subspaces using `df`.
        #. Train a response surface using `X` and `f` that exploits the active \
        subspace.

        """
        X, f, M, m = process_inputs_outputs(X, f)

        # check if the given inputs satisfy the assumptions
        if self.bounded_inputs:
            if np.any(X) > 1.0 or np.any(X) < -1.0:
                raise Exception('The supposedly bounded inputs exceed the \
                    bounds [-1,1].')
        else:
            if np.any(X) > 10.0 or np.any(X) < -10.0:
                raise Exception('There is a very good chance that your \
                    unbounded inputs are not properly scaled.')
        self.X, self.f, self.m = X, f, m

        if df is not None:
            df, M_df, m_df = process_inputs(df)
            if m_df != m:
                raise ValueError('The dimension of the gradients should be \
                                the same as the dimension of the inputs.')
        else:
            # if gradients aren't available, estimate them from data
            df = local_linear_gradients(X, f)


        # compute the active subspace
        ss = Subspaces()
        ss.compute(df)
        if avdim is not None:
            if not isinstance(avdim, int):
                raise TypeError('avdim should be an integer.')
            else:
                ss.partition(avdim)
        self.n = ss.W1.shape[1]
        print 'The dimension of the active subspace is {:d}.'.format(self.n)

        # set up the active variable domain and map
        if self.bounded_inputs:
            avdom = BoundedActiveVariableDomain(ss)
            avmap = BoundedActiveVariableMap(avdom)
        else:
            avdom = UnboundedActiveVariableDomain(ss)
            avmap = UnboundedActiveVariableMap(avdom)

        # build the response surface
        asrs = ActiveSubspaceResponseSurface(avmap)
        asrs.train_with_data(X, f)

        # set the R-squared coefficient
        self.Rsqr = asrs.respsurf.Rsqr
        self.as_respsurf = asrs
def _bootstrap_ranges(e, W, X, f, df, weights, ssmethod, nboot=100):
    """Compute bootstrap ranges for eigenvalues and subspaces.
    
    An implementation of the nonparametric bootstrap that we use in 
    conjunction with the subspace estimation methods to estimate the errors in 
    the eigenvalues and subspaces.
    
    Parameters
    ----------
    e : ndarray
        m-by-1 vector of eigenvalues
    W : ndarray
        m-by-m orthogonal matrix of eigenvectors
    X : ndarray
        M-by-m matrix of input samples, oriented as rows
    f : ndarray
        M-by-1 vector of outputs corresponding to rows of `X`
    df : ndarray
        M-by-m matrix of gradient samples
    weights : ndarray
        M-by-1 vector of weights corresponding to samples
    ssmethod : function
        a function that returns eigenpairs given input/output or gradient
        samples
    nboot : int, optional
        number of bootstrap samples (default 100)
        
    Returns
    -------
    e_br : ndarray
        m-by-2 matrix, first column contains bootstrap lower bound on 
        eigenvalues, second column contains bootstrap upper bound on 
        eigenvalues
    sub_br : ndarray
        (m-1)-by-3 matrix, first column contains bootstrap lower bound on 
        estimated subspace error, second column contains estimated mean of
        subspace error (a reasonable subspace error estimate), third column
        contains estimated upper bound on subspace error
    li_F : float
        Bing Li's metric for order determination based on determinants
    
    """
    if df is not None:
        df, M, m = process_inputs(df)
    else:
        X, M, m = process_inputs(X)
    
    e_boot = np.zeros((m, nboot))
    sub_dist = np.zeros((m-1, nboot))
    sub_det = np.zeros((m-1, nboot))
    
    # TODO: should be able to parallelize this
    for i in range(nboot):
        X0, f0, df0, weights0 = _bootstrap_replicate(X, f, df, weights)
        e0, W0 = ssmethod(X0, f0, df0, weights0)
        e_boot[:,i] = e0.reshape((m,))
        for j in range(m-1):
            sub_dist[j,i] = np.linalg.norm(np.dot(W[:,:j+1].T, W0[:,j+1:]), ord=2)
            sub_det[j,i] = np.linalg.det(np.dot(W[:,:j+1].T, W0[:,:j+1]))
    
    # bootstrap ranges for the eigenvalues
    e_br = np.hstack(( np.amin(e_boot, axis=1).reshape((m, 1)), \
                        np.amax(e_boot, axis=1).reshape((m, 1)) ))
    
    # bootstrap ranges and mean for subspace distance
    sub_br = np.hstack(( np.amin(sub_dist, axis=1).reshape((m-1, 1)), \
                        np.mean(sub_dist, axis=1).reshape((m-1, 1)), \
                        np.amax(sub_dist, axis=1).reshape((m-1, 1)) ))
    
    # metric from Li's ladle plot paper
    li_F = np.vstack(( np.zeros((1,1)), np.sum(1.0 - np.fabs(sub_det), axis=1).reshape((m-1, 1)) / nboot ))
    li_F = li_F / np.sum(li_F)

    return e_br, sub_br, li_F
 def compute(self, X=None, f=None, df=None, weights=None, sstype='AS', ptype='EVG', nboot=0):
     """Compute the active and inactive subspaces.
     
     Given input points and corresponding outputs, or given samples of the 
     gradients, estimate an active subspace. This method has four different
     algorithms for estimating the active subspace: 'AS' is the standard
     active subspace that requires gradients, 'OLS' uses a global linear
     model to estimate a one-dimensional active subspace, 'QPHD' uses a 
     global quadratic model to estimate subspaces, and 'OPG' uses a set of
     local linear models computed from subsets of give input/output pairs.
     
     The function also sets the dimension of the active subspace (and, 
     consequently, the dimenison of the inactive subspace). There are three
     heuristic choices for the dimension of the active subspace. The default
     is the largest gap in the eigenvalue spectrum, which is 'EVG'. The other
     two choices are 'RS', which estimates the error in a low-dimensional 
     response surface using the eigenvalues and the estimated subspace 
     errors, and 'LI' which is a heuristic from Bing Li on order 
     determination. 
     
     Note that either `df` or `X` and `f` must be given, although formally
     all are optional.
     
     Parameters
     ----------
     X : ndarray, optional
         M-by-m matrix of samples of inputs points, arranged as rows (default
         None)
     f : ndarray, optional
         M-by-1 matrix of outputs corresponding to rows of `X` (default None)
     df : ndarray, optional
         M-by-m matrix of samples of gradients, arranged as rows (default
         None)
     weights : ndarray, optional
        M-by-1 matrix of weights associated with rows of `X`
     sstype : str, optional
        defines subspace type to compute. Default is 'AS' for active 
        subspace, which requires `df`. Other  options are `OLS` for a global 
        linear model, `QPHD` for a global quadratic model, and `OPG` for 
        local linear models. The latter three require `X` and `f`.
     ptype : str, optional
         defines the partition type. Default is 'EVG' for largest 
         eigenvalue gap. Other options are 'RS', which is an estimate of the
         response surface error, and 'LI', which is a heuristic proposed by
         Bing Li based on subspace errors and eigenvalue decay.
     nboot : int, optional
         number of bootstrap samples used to estimate the error in the 
         estimated subspace (default 0 means no bootstrap estimates)
         
     Notes
     -----
     Partition type 'RS' and 'LI' require nboot to be greater than 0 (and
     probably something more like 100) to get bootstrap estimates of the 
     subspace error. 
     """
     
     # Check inputs
     if X is not None:
         X, M, m = process_inputs(X)
     elif df is not None:
         df, M, m = process_inputs(df)
     else:
         raise Exception('One of input/output pairs (X,f) or gradients (df) must not be None')
         
     if weights is None:
         # default weights is for Monte Carlo
         weights = np.ones((M, 1)) / M
     
     # Compute the subspace
     if sstype == 'AS':
         if df is None:
             raise Exception('df is None')
         e, W = active_subspace(df, weights)
         ssmethod = lambda X, f, df, weights: active_subspace(df, weights)
     elif sstype == 'OLS':
         if X is None or f is None:
             raise Exception('X or f is None')
         e, W = ols_subspace(X, f, weights)
         ssmethod = lambda X, f, df, weights: ols_subspace(X, f, weights)
     elif sstype == 'QPHD':
         if X is None or f is None:
             raise Exception('X or f is None')
         e, W = qphd_subspace(X, f, weights)
         ssmethod = lambda X, f, df, weights: qphd_subspace(X, f, weights)
     elif sstype == 'OPG':
         if X is None or f is None:
             raise Exception('X or f is None')
         e, W = opg_subspace(X, f, weights)
         ssmethod = lambda X, f, df, weights: opg_subspace(X, f, weights)
     else:
         e, W = None, None
         ssmethod = None
         raise Exception('Unrecognized subspace type: {}'.format(sstype))
     
     self.eigenvals, self.eigenvecs = e, W    
     
     # Compute bootstrap ranges and partition
     if nboot > 0:
         e_br, sub_br, li_F = _bootstrap_ranges(e, W, X, f, df, weights, ssmethod, nboot)
     else:
         if ptype == 1 or ptype == 2:
             raise Exception('Need to run bootstrap for partition type {}'.format(ptype))
         
         e_br, sub_br = None, None
         
     self.e_br, self.sub_br = e_br, sub_br
     
     # Compute the partition
     if ptype == 'EVG':
         n = eig_partition(e)[0]
     elif ptype == 'RS':
         sub_err = sub_br[:,1].reshape((m-1, 1))
         n = errbnd_partition(e, sub_err)[0]
     elif ptype == 'LI':
         n = ladle_partition(e, li_F)[0]
     else:
         raise Exception('Unrecognized partition type: {}'.format(ptype))
     
     self.partition(n)
Beispiel #24
0
def _bootstrap_ranges(e, W, X, f, df, weights, ssmethod, nboot=100):
    """Compute bootstrap ranges for eigenvalues and subspaces.

    An implementation of the nonparametric bootstrap that we use in
    conjunction with the subspace estimation methods to estimate the errors in
    the eigenvalues and subspaces.

    Parameters
    ----------
    e : ndarray
        m-by-1 vector of eigenvalues
    W : ndarray
        m-by-m orthogonal matrix of eigenvectors
    X : ndarray
        M-by-m matrix of input samples, oriented as rows
    f : ndarray
        M-by-1 vector of outputs corresponding to rows of `X`
    df : ndarray
        M-by-m matrix of gradient samples
    weights : ndarray
        M-by-1 vector of weights corresponding to samples
    ssmethod : function
        a function that returns eigenpairs given input/output or gradient
        samples
    nboot : int, optional
        number of bootstrap samples (default 100)

    Returns
    -------
    e_br : ndarray
        m-by-2 matrix, first column contains bootstrap lower bound on
        eigenvalues, second column contains bootstrap upper bound on
        eigenvalues
    sub_br : ndarray
        (m-1)-by-3 matrix, first column contains bootstrap lower bound on
        estimated subspace error, second column contains estimated mean of
        subspace error (a reasonable subspace error estimate), third column
        contains estimated upper bound on subspace error
    li_F : float
        Bing Li's metric for order determination based on determinants

    """
    if df is not None:
        df, M, m = process_inputs(df)
    else:
        X, M, m = process_inputs(X)

    e_boot = np.zeros((m, nboot))
    sub_dist = np.zeros((m-1, nboot))
    sub_det = np.zeros((m-1, nboot))

    # TODO: should be able to parallelize this
    for i in range(nboot):
        X0, f0, df0, weights0 = _bootstrap_replicate(X, f, df, weights)
        e0, W0 = ssmethod(X0, f0, df0, weights0)
        e_boot[:,i] = e0.reshape((m,))
        for j in range(m-1):
            sub_dist[j,i] = np.linalg.norm(np.dot(W[:,:j+1].T, W0[:,j+1:]), ord=2)
            sub_det[j,i] = np.linalg.det(np.dot(W[:,:j+1].T, W0[:,:j+1]))

    # bootstrap ranges for the eigenvalues
    e_br = np.hstack(( np.amin(e_boot, axis=1).reshape((m, 1)), \
                        np.amax(e_boot, axis=1).reshape((m, 1)) ))

    # bootstrap ranges and mean for subspace distance
    sub_br = np.hstack(( np.amin(sub_dist, axis=1).reshape((m-1, 1)), \
                        np.mean(sub_dist, axis=1).reshape((m-1, 1)), \
                        np.amax(sub_dist, axis=1).reshape((m-1, 1)) ))

    # metric from Li's ladle plot paper
    li_F = np.vstack(( np.zeros((1,1)), np.sum(1.0 - np.fabs(sub_det), axis=1).reshape((m-1, 1)) / nboot ))
    li_F = li_F / np.sum(li_F)

    return e_br, sub_br, li_F
Beispiel #25
0
    def compute(self, df ,f = 0, X = 0,function=0, c_index = 0, comp_flag =0,N=5, n_boot=200):
        """
        Compute the active and inactive subspaces from a collection of
        sampled gradients.

        :param ndarray df: an ndarray of size M-by-m that contains evaluations of the gradient.
        :param ndarray f: an ndarray of size M that contains evaluations of the function.
        :param ndarray X: an ndarray of size M-by-m that contains data points in the input space.
        :param function: a specified function that outputs f(x), and df(x) the gradient vector for a data point x
        :param int c_index: an integer specifying which C matrix to compute, the default matrix is 0.
        :param int comp_flag: an integer specifying computation method: 0 for monte carlo, 1 for LG quadrature.
        :param int N: number of quadrature points per dimension.
        :param int n_boot: number of bootstrap replicates to use when computing bootstrap ranges.

        **Notes**

        This method sets the class's attributes `W1`, `W2`, `eigenvalues`, and
        `eigenvectors`. If `n_boot` is greater than zero, then this method
        also runs a bootstrap to compute and set `e_br` and `sub_br`.
        """
        
        if c_index != 4:
            df, M, m = process_inputs(df)
        else:
            M = np.shape(X)[0]
            m = np.shape(X)[1]/2
        if not isinstance(n_boot, int):
            raise TypeError('n_boot must be an integer.')
        evecs = np.zeros((m,m))
        evals = np.zeros(m)
        e_br = np.zeros((m,2))
        sub_br = np.zeros((m-1,3))
        # compute eigenvalues and eigenvecs
        if c_index == 0:
            logging.getLogger('PAUL').info('Computing spectral decomp with {:d} samples in {:d} dims.'.format(M, m))
            evals, evecs = spectral_decomposition(df=df)
            if comp_flag == 0:
                # compute bootstrap ranges for eigenvalues and subspace distances
                if n_boot > 0:
                    logging.getLogger('PAUL').info('Bootstrapping {:d} spectral decomps of size {:d} by {:d}.'.format(n_boot, M, m))
                    e_br, sub_br = bootstrap_ranges(df, evals, evecs, n_boot=n_boot)
        elif c_index == 1:  
            if comp_flag == 0:
                evals, evecs = spectral_decomposition(df,f,X,c_index=c_index,comp_flag=comp_flag)
                # compute bootstrap ranges for eigenvalues and subspace distances
                if n_boot > 0:
                    logging.getLogger('PAUL').info('Bootstrapping {:d} spectral decomps of size {:d} by {:d}.'.format(n_boot, M, m))
                    e_br, sub_br = bootstrap_ranges(df, evals, evecs,f, X, c_index,n_boot)
            elif comp_flag == 1:
                evals, evecs = spectral_decomposition(df,f,X,function,c_index,N,comp_flag)        
        elif c_index == 2:
            if comp_flag == 0:
                evals, evecs = spectral_decomposition(df,f,X,c_index=c_index,comp_flag=comp_flag)
                # compute bootstrap ranges for eigenvalues and subspace distances
                if n_boot > 0:
                    logging.getLogger('PAUL').info('Bootstrapping {:d} spectral decomps of size {:d} by {:d}.'.format(n_boot, M, m))
                    e_br, sub_br = bootstrap_ranges(df, evals, evecs,f, X, c_index,n_boot)
            elif comp_flag == 1:
                evals, evecs = spectral_decomposition(df,f,X,function,c_index,N,comp_flag)
                
        elif c_index == 3:
            if comp_flag == 0:
                evals, evecs = spectral_decomposition(df,f,X,c_index=c_index,comp_flag=comp_flag)
                # compute bootstrap ranges for eigenvalues and subspace distances
                if n_boot > 0:
                    logging.getLogger('PAUL').info('Bootstrapping {:d} spectral decomps of size {:d} by {:d}.'.format(n_boot, M, m))
                    e_br, sub_br = bootstrap_ranges(df, evals, evecs,f, X, c_index,n_boot)
            elif comp_flag == 1:
                evals, evecs = spectral_decomposition(df,f,X,function,c_index,N,comp_flag)
        elif c_index == 4:
            if comp_flag == 0:
                evals, evecs = spectral_decomposition(df,f,X,c_index=c_index,comp_flag=comp_flag)
                # compute bootstrap ranges for eigenvalues and subspace distances
                if n_boot > 0:
                   # logging.getLogger('PAUL').info('Bootstrapping {:d} spectral decomps of size {:d} by {:d}.'.format(n_boot, M, 2*m))
                    e_br, sub_br = bootstrap_ranges(df,evals, evecs,f, X, c_index,n_boot)
            elif comp_flag == 1:
                evals, evecs = spectral_decomposition(df,f,X,function,c_index,N,comp_flag)   
        self.e_br, self.sub_br = e_br, sub_br    
        self.e_br, self.sub_br = e_br, sub_br    
        self.eigenvalues, self.eigenvectors = evals, evecs

        

        # partition the subspaces with a crappy heuristic
        n = compute_partition(evals)
        self.partition(n)
Beispiel #26
0
 def compute(self, X=None, f=None, df=None, weights=None, sstype=0, ptype=0, nboot=0):
     """
     TODO: docs
     
     Subspace types (sstype):
         0, active subspace
         1, normalized active subspace
         2, active subspace x
         3, normalized active subspace x
         4, swarm subspace
         5, ols sdr
         6, qphd, sdr
         7, sir, sdr
         8, phd, sdr
         9, save, sdr
         10, mave, sdr
         11, opg, sdr
         
     Partition types (ptype):
         0, eigenvalue gaps
         1, response surface error bound
         2, Li's ladle plot
     """
     
     # Check inputs
     if X is not None:
         X, M, m = process_inputs(X)
     elif df is not None:
         df, M, m = process_inputs(df)
     else:
         raise Exception('One of input/output pairs (X,f) or gradients (df) must not be None')
         
     if weights is None:
         # default weights is for Monte Carlo
         weights = np.ones((M, 1)) / M
     
     # Compute the subspace
     if sstype == 0:
         if df is None:
             raise Exception('df is None')
         e, W = active_subspace(df, weights)
         ssmethod = lambda X, f, df, weights: active_subspace(df, weights)
     elif sstype == 1:
         if df is None:
             raise Exception('df is None')
         e, W = normalized_active_subspace(df, weights)
         ssmethod = lambda X, f, df, weights: normalized_active_subspace(df, weights)
     elif sstype == 2:
         if X is None or df is None:
             raise Exception('X or df is None')
         e, W = active_subspace_x(X, df, weights)
         ssmethod = lambda X, f, df, weights: active_subspace_x(X, df, weights)
     elif sstype == 3:
         if X is None or df is None:
             raise Exception('X or df is None')            
         e, W = normalized_active_subspace_x(X, df, weights)
         ssmethod = lambda X, f, df, weights: normalized_active_subspace_x(X, df, weights)
     elif sstype == 4:
         if X is None or f is None:
             raise Exception('X or f is None')
         e, W = swarm_subspace(X, f, weights)
         ssmethod = lambda X, f, df, weights: swarm_subspace(X, f, weights)
     elif sstype == 5:
         if X is None or f is None:
             raise Exception('X or f is None')
         e, W = ols_subspace(X, f, weights)
         ssmethod = lambda X, f, df, weights: ols_subspace(X, f, weights)
     elif sstype == 6:
         if X is None or f is None:
             raise Exception('X or f is None')
         e, W = qphd_subspace(X, f, weights)
         ssmethod = lambda X, f, df, weights: qphd_subspace(X, f, weights)
     elif sstype == 7:
         if X is None or f is None:
             raise Exception('X or f is None')
         e, W = sir_subspace(X, f, weights)
         ssmethod = lambda X, f, df, weights: sir_subspace(X, f, weights)
     elif sstype == 8:
         if X is None or f is None:
             raise Exception('X or f is None')
         e, W = phd_subspace(X, f, weights)
         ssmethod = lambda X, f, df, weights: phd_subspace(X, f, weights)
     elif sstype == 9:
         if X is None or f is None:
             raise Exception('X or f is None')
         e, W = save_subspace(X, f, weights)
         ssmethod = lambda X, f, df, weights: save_subspace(X, f, weights)
     elif sstype == 10:
         if X is None or f is None:
             raise Exception('X or f is None')
         e, W = mave_subspace(X, f, weights)
         ssmethod = lambda X, f, df, weights: mave_subspace(X, f, weights)
     elif sstype == 11:
         if X is None or f is None:
             raise Exception('X or f is None')
         e, W = opg_subspace(X, f, weights)
         ssmethod = lambda X, f, df, weights: opg_subspace(X, f, weights)
     else:
         e, W = None, None
         ssmethod = None
         raise Exception('Unrecognized subspace type: {:d}'.format(sstype))
     
     self.eigenvalues, self.eigenvectors = e, W    
     
     # Compute bootstrap ranges and partition
     if nboot > 0:
         e_br, sub_br, li_F = bootstrap_ranges(e, W, X, f, df, weights, ssmethod, nboot)
     else:
         if ptype == 1 or ptype == 2:
             raise Exception('Need to run bootstrap for partition type {:d}'.format(ptype))
         
         e_br, sub_br = None, None
         
     self.e_br, self.sub_br = e_br, sub_br
     
     # Compute the partition
     if ptype == 0:
         n = eig_partition(e)[0]
     elif ptype == 1:
         sub_err = sub_br[:,1].reshape((m-1, 1))
         n = errbnd_partition(e, sub_err)[0]
     elif ptype == 2:
         n = ladle_partition(e, li_F)[0]
     else:
         raise Exception('Unrecognized partition type: {:d}'.format(ptype))
     
     self.partition(n)
Beispiel #27
0
    def compute(self, X=None, f=None, df=None, weights=None, sstype='AS', ptype='EVG', nboot=0):
        """Compute the active and inactive subspaces.

        Given input points and corresponding outputs, or given samples of the
        gradients, estimate an active subspace. This method has four different
        algorithms for estimating the active subspace: 'AS' is the standard
        active subspace that requires gradients, 'OLS' uses a global linear
        model to estimate a one-dimensional active subspace, 'QPHD' uses a
        global quadratic model to estimate subspaces, and 'OPG' uses a set of
        local linear models computed from subsets of give input/output pairs.

        The function also sets the dimension of the active subspace (and,
        consequently, the dimenison of the inactive subspace). There are three
        heuristic choices for the dimension of the active subspace. The default
        is the largest gap in the eigenvalue spectrum, which is 'EVG'. The other
        two choices are 'RS', which estimates the error in a low-dimensional
        response surface using the eigenvalues and the estimated subspace
        errors, and 'LI' which is a heuristic from Bing Li on order
        determination.

        Note that either `df` or `X` and `f` must be given, although formally
        all are optional.

        Parameters
        ----------
        X : ndarray, optional
            M-by-m matrix of samples of inputs points, arranged as rows (default
            None)
        f : ndarray, optional
            M-by-1 matrix of outputs corresponding to rows of `X` (default None)
        df : ndarray, optional
            M-by-m matrix of samples of gradients, arranged as rows (default
            None)
        weights : ndarray, optional
           M-by-1 matrix of weights associated with rows of `X`
        sstype : str, optional
           defines subspace type to compute. Default is 'AS' for active
           subspace, which requires `df`. Other  options are `OLS` for a global
           linear model, `QPHD` for a global quadratic model, and `OPG` for
           local linear models. The latter three require `X` and `f`.
        ptype : str, optional
            defines the partition type. Default is 'EVG' for largest
            eigenvalue gap. Other options are 'RS', which is an estimate of the
            response surface error, and 'LI', which is a heuristic proposed by
            Bing Li based on subspace errors and eigenvalue decay.
        nboot : int, optional
            number of bootstrap samples used to estimate the error in the
            estimated subspace (default 0 means no bootstrap estimates)

        Notes
        -----
        Partition type 'RS' and 'LI' require nboot to be greater than 0 (and
        probably something more like 100) to get bootstrap estimates of the
        subspace error.
        """

        # Check inputs
        if X is not None:
            X, M, m = process_inputs(X)
        elif df is not None:
            df, M, m = process_inputs(df)
        else:
            raise Exception('One of input/output pairs (X,f) or gradients (df) must not be None')

        if weights is None:
            # default weights is for Monte Carlo
            weights = np.ones((M, 1)) / M

        # Compute the subspace
        if sstype == 'AS':
            if df is None:
                raise Exception('df is None')
            e, W = active_subspace(df, weights)
            ssmethod = lambda X, f, df, weights: active_subspace(df, weights)
        elif sstype == 'OLS':
            if X is None or f is None:
                raise Exception('X or f is None')
            e, W = ols_subspace(X, f, weights)
            ssmethod = lambda X, f, df, weights: ols_subspace(X, f, weights)
        elif sstype == 'QPHD':
            if X is None or f is None:
                raise Exception('X or f is None')
            e, W = qphd_subspace(X, f, weights)
            ssmethod = lambda X, f, df, weights: qphd_subspace(X, f, weights)
        elif sstype == 'OPG':
            if X is None or f is None:
                raise Exception('X or f is None')
            e, W = opg_subspace(X, f, weights)
            ssmethod = lambda X, f, df, weights: opg_subspace(X, f, weights)
        else:
            e, W = None, None
            ssmethod = None
            raise Exception('Unrecognized subspace type: {}'.format(sstype))

        self.eigenvals, self.eigenvecs = e, W

        # Compute bootstrap ranges and partition
        if nboot > 0:
            e_br, sub_br, li_F = _bootstrap_ranges(e, W, X, f, df, weights, ssmethod, nboot)
        else:
            if ptype == 'RS' or ptype == 'LI':
                raise Exception('Need to run bootstrap for partition type {}'.format(ptype))

            e_br, sub_br = None, None

        self.e_br, self.sub_br = e_br, sub_br

        # Compute the partition
        if ptype == 'EVG':
            n = eig_partition(e)[0]
        elif ptype == 'RS':
            sub_err = sub_br[:,1].reshape((m-1, 1))
            n = errbnd_partition(e, sub_err)[0]
        elif ptype == 'LI':
            n = ladle_partition(e, li_F)[0]
        else:
            raise Exception('Unrecognized partition type: {}'.format(ptype))

        self.partition(n)
Beispiel #28
0
def spectral_decomposition(df,f=0,X=0,function=0,c_index=0,N=5,comp_flag=0):
    """
    Use the SVD to compute the eigenvectors and eigenvalues for the
    active subspace analysis.

    :param ndarray df: an ndarray of size M-by-m that contains evaluations of the gradient.
    :param ndarray f: an ndarray of size M that contains evaluations of the function.
    :param ndarray X: an ndarray of size M-by-m that contains data points in the input space.
    :param function: a specified function that outputs f(x), and df(x) the gradient vector for a data point x
    :param int c_index: an integer specifying which C matrix to compute, the default matrix is 0.
    :param int comp_flag: an integer specifying computation method: 0 for monte carlo, 1 for LG quadrature.
    :param int N: number of quadrature points per dimension.
    
    :return: [e, W], [ eigenvalues, eigenvectors ]
    :rtype: [ndarray, ndarray]

    **Notes**

    If the number M of gradient samples is less than the dimension m of the
    inputs, then the method builds an arbitrary basis for the nullspace, which
    corresponds to the inactive subspace.
    """
    # set integers
    if c_index != 4:
        df, M, m = process_inputs(df)
    else:
        M = int(np.shape(X)[0])
        m = int(np.shape(X)[1]/2)
    W = np.zeros((m,m))
    e = np.zeros((m,1))
    C = np.zeros((m,m))
    norm_tol = np.finfo(5.0).eps**(0.5)
    # compute active subspace
    if c_index == 0 and comp_flag == 0:
        if M >= m:
            U, sig, W = np.linalg.svd(np.dot(df.T,df), full_matrices=False)
        else:
            U, sig, W = np.linalg.svd(np.dot(df.T,df), full_matrices=True)
            sig = np.hstack((np.array(sig), np.zeros(m-M)))
        e = (sig**2) / M
    elif c_index == 0 and comp_flag == 1:
        xx = (np.ones(m)*N).astype(np.int64).tolist()  
        x,w = quad.gauss_legendre(xx)
        C = np.zeros((m,m))
        N = np.size(w)
        for i in range(0,N):
            [f,DF] = function(x[i,:])
            DF = DF.reshape((m,1))
            C = C + (np.dot(DF,DF.T))*w[i]
        U, sig, W = np.linalg.svd(C, full_matrices=True)
        e = (sig**2)
    elif c_index == 1 and comp_flag == 0:
        C =  (np.dot(X.T,df) + np.dot(df.T,X))/M
        U, sig, W = np.linalg.svd(C, full_matrices=True)
        e = (sig**2)
    elif c_index == 1 and comp_flag == 1:
        xx = (np.ones(m)*N).astype(np.int64).tolist()  
        x,w = quad.gauss_legendre(xx)
        C = np.zeros((m,m))
        N = np.size(w)
        for i in range(0,N):
            [f,DF] = function(x[i,:])
            xxx = x[i,:].reshape((m,1))
            DF = DF.reshape((m,1))
            C = C + (np.dot(xxx,DF.T) + np.dot(DF,xxx.T))*w[i]
        U, sig, W = np.linalg.svd(C, full_matrices=True)
        e = (sig**2)
    elif c_index == 2 and comp_flag == 0:
        row_count = np.shape(df)[0]
        i=0
        while (i< row_count):
            norm = np.linalg.norm(df[i,:])
            if( norm < norm_tol):
                df = np.delete(df,(i),axis=0)
                row_count = row_count-1
            else:
                df[i,:] = df[i,:]/norm
                i = i+1
        C =  np.dot(df.T,df)/M
        U, sig, W = np.linalg.svd(C, full_matrices=True)
        e = (sig**2)
    elif c_index == 2 and comp_flag == 1:
        xx = (np.ones(m)*N).astype(np.int64).tolist()  
        x,w = quad.gauss_legendre(xx)
        C = np.zeros((m,m))
        N = np.size(w)
        for i in range(0,N):
            [f,DF] = function(x[i,:])
            DF = DF.reshape((1,m))
            norm = np.linalg.norm(DF)
            if(norm > norm_tol):
                DF = DF/np.linalg.norm(DF)
                C = C + np.dot(DF.T,DF)*w[i]
        U, sig, W = np.linalg.svd(C, full_matrices=True)
        e = (sig**2)
    elif c_index == 3 and comp_flag == 0: 
        for i in range(0,M):
            xxx = X[i,:]
            DF = df[i,:]
            if(np.linalg.norm(xxx) < norm_tol):
                xxx = np.zeros(m)
            else:
                xxx = xxx/np.linalg.norm(xxx)
            if(np.linalg.norm(DF) < norm_tol):
                DF = np.zeros(m)
            else: 
                DF = DF/np.linalg.norm(DF)
            df[i,:] = DF
            X[i,:] = xxx
        C = C + (np.dot(df.T,X) + np.dot(X.T,df))/M
        U, sig, W = np.linalg.svd(C, full_matrices=True)
        e = (sig**2)
    elif c_index == 3 and comp_flag == 1:
        xx = (np.ones(m)*N).astype(np.int64).tolist()  
        x,w = quad.gauss_legendre(xx)
        C = np.zeros((m,m))
        N = np.size(w)
        for i in range(0,N):
            [f,DF] = function(x[i,:])
            xxx = x[i,:].reshape((m,1))
            if(np.linalg.norm(xxx) < norm_tol):
                xxx = np.zeros((m,1))
            else:
                xxx = xxx/np.linalg.norm(xxx)
            DF = DF.reshape((m,1))
            if(np.linalg.norm(DF) < norm_tol):
                DF = np.zeros(m)
            else:
               # print('shape xxx= ',np.shape(xxx),'shape DF = ', np.shape(DF))
                DF = DF/np.linalg.norm(DF)
            C = C + (np.dot(xxx,DF.T) + np.dot(DF,xxx.T))*w[i]
        U, sig, W = np.linalg.svd(C, full_matrices=True)
        e = (sig**2)
    elif c_index == 4 and comp_flag == 0:
        x = X[:,:m]
        y = X[:,m:]
        A = (f[:M]-f[M:])**2
        for i in range(0,M):
            vec = (x[i,:]-y[i,:]).reshape((m,1))
            if np.linalg.norm(vec) > norm_tol:
                vec =  (vec)/np.linalg.norm(vec)
                C = C + A[i]*np.dot(vec,vec.T)/M
        U, sig, W = np.linalg.svd(C, full_matrices=True)
        e = (sig**2)
    elif c_index == 4 and comp_flag == 1:
        xx = (np.ones(2*m)*N).astype(np.int64).tolist()  
        x,w = quad.gauss_legendre(xx)
        N = np.size(w)
        for i in range(0,N):
            norm = np.linalg.norm(x[i,:m]-x[i,m:]) 
            if( norm > norm_tol):
                xxx = x[i,:m].reshape((m,1))
                yyy = x[i,m:].reshape((m,1))
                [f_x,DF] = function(xxx)
                [f_y,DF] = function(yyy)
                C = C + w[i]*np.dot((xxx-yyy),(xxx-yyy).T)*(f_x-f_y)**2/norm**2
        [evals,WW] = np.linalg.eig(C)
        order = np.argsort(evals)
        order = np.flipud(order)
        e = evals[order]
        W = np.zeros((m,m))
        for jj in range(0,m):
            W[:,jj] = WW[:,order[jj]]
    W = W.T
    W = W*np.sign(W[0,:])
    return e.reshape((m,1)), W.reshape((m,m))