Example #1
0
def __negloglikgrad(hyp, info):
    """Return gradient of the penalized log likelihood of single demensional
    GP model."""
    R0, dR = __covmat(info['theta'], info['theta'], hyp[:-1], True)
    nug = np.exp(hyp[-1]) / (1 + np.exp(hyp[-1]))
    R = (1 - nug) * R0 + nug * np.eye(info['theta'].shape[0])
    if info['gvar'] is not None:
        R += np.diag(info['gvar'])

    dR = (1 - nug) * dR
    dRappend = nug / (
        (1 + np.exp(hyp[-1]))) * (-R0 + np.eye(info['theta'].shape[0]))
    dR = np.append(dR, dRappend[:, :, None], axis=2)
    W, V = np.linalg.eigh(R)
    Vh = V / np.sqrt(np.abs(W))
    fcenter = Vh.T @ info['g']
    n = info['g'].shape[0]
    sig2hat = (n * np.mean(fcenter**2) + 10) / (n + 10)
    dnegloglik = np.zeros(dR.shape[2])
    Rinv = Vh @ Vh.T

    for k in range(0, dR.shape[2]):
        dsig2hat = -np.sum((Vh @ np.multiply.outer(fcenter, fcenter) @ Vh.T) *
                           dR[:, :, k]) / (n + 10)
        dnegloglik[k] += 0.5 * n * dsig2hat / sig2hat
        dnegloglik[k] += 0.5 * np.sum(Rinv * dR[:, :, k])

    dnegloglik += (10**(-8) + hyp - info['hypregmean']) / (
        (info['hypregstd'])**2)
    return dnegloglik
Example #2
0
def __negloglik(hyp, info):
    """Return penalized log likelihood of single demensional GP model."""
    R0 = __covmat(info['theta'], info['theta'], hyp[:-1])
    nug = np.exp(hyp[-1]) / (1 + np.exp(hyp[-1]))
    R = (1 - nug) * R0 + nug * np.eye(info['theta'].shape[0])
    if info['gvar'] is not None:
        R += np.diag(info['gvar'])
    W, V = np.linalg.eigh(R)
    Vh = V / np.sqrt(np.abs(W))
    fcenter = Vh.T @ info['g']
    n = info['g'].shape[0]
    sig2hat = (n * np.mean(fcenter**2) + 10) / (n + 10)
    negloglik = 1 / 2 * np.sum(np.log(np.abs(W))) + 1 / 2 * n * np.log(sig2hat)
    negloglik += 0.5 * np.sum(
        (((10**(-8) + hyp - info['hypregmean'])) / (info['hypregstd']))**2)
    return negloglik
Example #3
0
def predict(predinfo, fitinfo, x, theta, **kwargs):
    r"""
    Finds prediction at theta and x given the dictionary fitinfo.
    This [emulationpredictdocstring] automatically filled by docinfo.py when
    running updatedocs.py

    Parameters
    ----------
    predinfo : dict
        An arbitary dictionary where you should place all of your prediction
        information once complete. This dictionary is pass by reference, so
        there is no reason to return anything. Keep only stuff that will be
        used by predict. Key elements are

            - `predinfo['mean']` : `predinfo['mean'][k]` is mean of the prediction
              at all x at `theta[k]`.
            - `predinfo['var']` : `predinfo['var'][k]` is variance of the
              prediction at all x at `theta[k]`.
            - `predinfo['cov']` : `predinfo['cov'][k]` is mean of the prediction
              at all x at `theta[k]`.
            - `predinfo['covhalf']` : if `A = predinfo['covhalf'][k]` then
              `A.T @ A = predinfo['cov'][k]`.

    fitinfo : dict
        An arbitary dictionary where you placed all your important fitting
        information from the fit function above.

    x : array of objects
        An matrix (vector) of inputs for prediction.

    theta :  array of objects
        An matrix (vector) of parameters to prediction.

    kwargs : dict
        A dictionary containing options passed to you.
    """
    return_grad = False
    if (kwargs is not None) and ('return_grad' in kwargs.keys()) and \
            (kwargs['return_grad'] is True):
        return_grad = True
    return_covx = True
    if (kwargs is not None) and ('return_covx' in kwargs.keys()) and \
            (kwargs['return_covx'] is False):
        return_covx = False
    infos = fitinfo['emulist']
    predvecs = np.zeros((theta.shape[0], len(infos)))
    predvars = np.zeros((theta.shape[0], len(infos)))

    if return_grad:
        predvecs_gradtheta = np.zeros(
            (theta.shape[0], len(infos), theta.shape[1]))
        predvars_gradtheta = np.zeros(
            (theta.shape[0], len(infos), theta.shape[1]))
        drsave = np.array(np.ones(len(infos)), dtype=object)
    if predvecs.ndim < 1.5:
        predvecs = predvecs.reshape((1, -1))
        predvars = predvars.reshape((1, -1))
    try:
        if x is None or np.all(np.equal(x, fitinfo['x'])) or \
                np.allclose(x, fitinfo['x']):
            xind = np.arange(0, x.shape[0])
            xnewind = np.arange(0, x.shape[0])
        else:
            raise
    except Exception:
        matchingmatrix = np.ones((x.shape[0], fitinfo['x'].shape[0]))
        for k in range(0, x[0].shape[0]):
            try:
                matchingmatrix *= np.isclose(x[:, k][:, None], fitinfo['x'][:,
                                                                            k])
            except Exception:
                matchingmatrix *= np.equal(x[:, k][:, None], fitinfo['x'][:,
                                                                          k])
        xind = np.argwhere(matchingmatrix > 0.5)[:, 1]
        xnewind = np.argwhere(matchingmatrix > 0.5)[:, 0]

    rsave = np.array(np.ones(len(infos)), dtype=object)

    # loop over principal components
    for k in range(0, len(infos)):
        if infos[k]['hypind'] == k:
            # covariance matrix between new theta and thetas from fit.
            if return_grad:
                rsave[k], drsave[k] = __covmat(theta,
                                               fitinfo['theta'],
                                               infos[k]['hypcov'],
                                               return_gradx1=True)
            else:
                rsave[k] = __covmat(theta, fitinfo['theta'],
                                    infos[k]['hypcov'])
        # adjusted covariance matrix
        r = (1 - infos[k]['nug']) * np.squeeze(rsave[infos[k]['hypind']])

        try:
            rVh = r @ infos[k]['Vh']
            rVh2 = rVh @ (infos[k]['Vh']).T
        except Exception:
            for i in range(0, len(infos)):
                print((i, infos[i]['hypind']))
            raise ValueError('Something went wrong with fitted components')

        if rVh.ndim < 1.5:
            rVh = rVh.reshape((1, -1))
        if rVh2.ndim < 1.5:
            rVh2 = np.reshape(rVh2, (1, -1))
        predvecs[:, k] = r @ infos[k]['pw']
        if return_grad:
            drsave_hypind = np.squeeze(drsave[infos[k]['hypind']])
            if drsave_hypind.ndim < 2.5 and theta.shape[1] < 1.5:
                drsave_hypind = np.reshape(drsave_hypind,
                                           (*drsave_hypind.shape, 1))
            elif drsave_hypind.ndim < 2.5 and theta.shape[1] > 1.5:
                drsave_hypind = np.reshape(drsave_hypind,
                                           (1, *drsave_hypind.shape))

            dr = (1 - infos[k]['nug']) * drsave_hypind
            if dr.ndim == 2:
                drVh = dr.T @ infos[k]['Vh']
                predvecs_gradtheta[:, k, :] = dr.T @ infos[k]['pw']
                predvars_gradtheta[:, k, :] = \
                    -infos[k]['sig2'] * 2 * np.sum(rVh * drVh, 1)
            else:
                drpw = np.squeeze(dr.transpose(0, 2, 1) @ infos[k]['pw'])
                if drpw.ndim < 1.5 and theta.shape[1] < 1.5:
                    drpw = np.reshape(drpw, (-1, 1))
                elif drpw.ndim < 1.5 and theta.shape[1] > 1.5:
                    drpw = np.reshape(drpw, (1, -1))

                predvecs_gradtheta[:, k, :] = (1 - infos[k]['nug']) * drpw
                predvars_gradtheta[:, k, :] = \
                    -(infos[k]['sig2'] * 2) * np.einsum("ij,ijk->ik", rVh2, dr)
        predvars[:, k] = infos[k]['sig2'] * np.abs(1 - np.sum(rVh**2, 1))

    # calculate predictive mean and variance
    predinfo['mean'] = np.full((x.shape[0], theta.shape[0]), np.nan)
    predinfo['var'] = np.full((x.shape[0], theta.shape[0]), np.nan)
    pctscale = (fitinfo['pct'].T * fitinfo['scale']).T
    predinfo['mean'][xnewind, :] = ((predvecs @ pctscale[xind, :].T) +
                                    fitinfo['offset'][xind]).T

    predinfo['var'][xnewind, :] = ((fitinfo['extravar'][xind] +
                                    predvars @ (pctscale[xind, :]**2).T)).T
    predinfo['extravar'] = 1 * fitinfo['extravar'][xind]
    predinfo['predvars'] = 1 * predvars
    predinfo['predvecs'] = 1 * predvecs
    predinfo['phi'] = 1 * pctscale[xind, :]

    if return_covx:
        CH = (np.sqrt(predvars)[:, :, None] *
              (pctscale[xind, :].T)[None, :, :])

        predinfo['covxhalf'] = np.full(
            (theta.shape[0], CH.shape[1], x.shape[0]), np.nan)
        predinfo['covxhalf'][:, :, xnewind] = CH
        predinfo['covxhalf'] = predinfo['covxhalf'].transpose((2, 0, 1))

    if return_grad:
        predinfo['mean_gradtheta'] = np.full(
            (x.shape[0], theta.shape[0], theta.shape[1]), np.nan)
        predinfo['mean_gradtheta'][xnewind, :, :] = \
            ((predvecs_gradtheta.transpose(0, 2, 1) @
              pctscale[xind, :].T)).transpose((2, 0, 1))
        predinfo['predvars_gradtheta'] = 1 * predvars_gradtheta
        predinfo['predvecs_gradtheta'] = 1 * predvecs_gradtheta

        if return_covx:

            dsqrtpredvars = 0.5 * (predvars_gradtheta.transpose(2, 0, 1) /
                                   np.sqrt(predvars)).transpose(1, 2, 0)

            if np.allclose(xnewind, xind):
                predinfo['covxhalf_gradtheta'] = \
                    (dsqrtpredvars.transpose(2, 0, 1)[:, :, :, None] *
                     (pctscale[xind, :].T)[None, :, :]).transpose(3, 1, 2, 0)
            else:
                predinfo['covxhalf_gradtheta'] = np.full(
                    (x.shape[0], theta.shape[0], CH.shape[1], theta.shape[1]),
                    np.nan)
                predinfo['covxhalf_gradtheta'][xnewind] = \
                    (dsqrtpredvars.transpose(2, 0, 1)[:, :, :, None] *
                     (pctscale[xind, :].T)[None, :, :]).transpose(3, 1, 2, 0)
    return
Example #4
0
def __fitGP1d(theta,
              g,
              hyp1,
              hyp2,
              gvar=None,
              hypstarts=None,
              hypinds=None,
              prevsubmodel=None):
    """Return a fitted model from the emulator model using smart method."""

    subinfo = {}
    subinfo['hypregmean'] = np.append(
        0 + 0.5 * np.log(theta.shape[1]) + np.log(np.std(theta, 0)), (0, hyp1))
    subinfo['hypregLB'] = np.append(
        -4 + 0.5 * np.log(theta.shape[1]) + np.log(np.std(theta, 0)),
        (-12, hyp2))

    subinfo['hypregUB'] = np.append(
        4 + 0.5 * np.log(theta.shape[1]) + np.log(np.std(theta, 0)), (2, 0))
    subinfo['hypregstd'] = (subinfo['hypregUB'] - subinfo['hypregLB']) / 8
    subinfo['hypregstd'][-2] = 2
    subinfo['hypregstd'][-1] = 4
    subinfo['hyp'] = 1 * subinfo['hypregmean']
    nhyptrain = np.max(np.min((20 * theta.shape[1], theta.shape[0])))
    if theta.shape[0] > nhyptrain:
        thetac = np.random.choice(theta.shape[0], nhyptrain, replace=False)
    else:
        thetac = range(0, theta.shape[0])
    subinfo['theta'] = theta[thetac, :]
    subinfo['g'] = g[thetac]
    subinfo['gvar'] = gvar[thetac]
    hypind0 = -1

    L0 = __negloglik(subinfo['hyp'], subinfo)
    if hypstarts is not None:
        L0 = __negloglik(subinfo['hyp'], subinfo)
        for k in range(0, hypstarts.shape[0]):
            L1 = __negloglik(hypstarts[k, :], subinfo)
            if L1 < L0:
                subinfo['hyp'] = hypstarts[k, :]
                L0 = 1 * L1
                hypind0 = hypinds[k]

    if hypind0 > -0.5 and hypstarts.ndim > 1:
        dL = __negloglikgrad(subinfo['hyp'], subinfo)
        scalL = np.std(hypstarts, 0) * hypstarts.shape[0] / \
            (1 + hypstarts.shape[0]) + (1 / (1 + hypstarts.shape[0]) * subinfo['hypregstd'])
        if np.sum((dL * scalL) ** 2) < 1.25 * \
                (subinfo['hyp'].shape[0] + 5 * np.sqrt(subinfo['hyp'].shape[0])):
            skipop = True
        else:
            skipop = False
    else:
        skipop = False

    if (not skipop):

        def scaledlik(hypv):
            hyprs = subinfo['hypregmean'] + hypv * subinfo['hypregstd']
            return __negloglik(hyprs, subinfo)

        def scaledlikgrad(hypv):
            hyprs = subinfo['hypregmean'] + hypv * subinfo['hypregstd']
            return __negloglikgrad(hyprs, subinfo) * subinfo['hypregstd']

        newLB = (subinfo['hypregLB'] -
                 subinfo['hypregmean']) / subinfo['hypregstd']
        newUB = (subinfo['hypregUB'] -
                 subinfo['hypregmean']) / subinfo['hypregstd']

        newhyp0 = (subinfo['hyp'] -
                   subinfo['hypregmean']) / subinfo['hypregstd']

        opval = spo.minimize(scaledlik,
                             newhyp0,
                             method='L-BFGS-B',
                             options={'gtol': 0.1},
                             jac=scaledlikgrad,
                             bounds=spo.Bounds(newLB, newUB))

        hypn = subinfo['hypregmean'] + opval.x * subinfo['hypregstd']
        likdiff = (L0 - __negloglik(hypn, subinfo))
    else:
        likdiff = 0
    if hypind0 > -0.5 and (2 * likdiff) < 1.25 * \
            (subinfo['hyp'].shape[0] + 5 * np.sqrt(subinfo['hyp'].shape[0])):
        subinfo['hypcov'] = subinfo['hyp'][:-1]
        subinfo['hypind'] = hypind0
        subinfo['nug'] = np.exp(
            subinfo['hyp'][-1]) / (1 + np.exp(subinfo['hyp'][-1]))

        R = __covmat(theta, theta, subinfo['hypcov'])

        subinfo['R'] = (1 - subinfo['nug']) * R + subinfo['nug'] * np.eye(
            R.shape[0])
        if gvar is not None:
            subinfo['R'] += np.diag(gvar)

        W, V = np.linalg.eigh(subinfo['R'])
        Vh = V / np.sqrt(np.abs(W))
        fcenter = Vh.T @ g
        subinfo['Vh'] = Vh
        n = subinfo['R'].shape[0]
        subinfo['sig2'] = (np.mean(fcenter**2) * n + 1) / (n + 1)
        subinfo['Rinv'] = V @ np.diag(1 / W) @ V.T
    else:
        subinfo['hyp'] = hypn
        subinfo['hypind'] = -1
        subinfo['hypcov'] = subinfo['hyp'][:-1]
        subinfo['nug'] = np.exp(
            subinfo['hyp'][-1]) / (1 + np.exp(subinfo['hyp'][-1]))

        R = __covmat(theta, theta, subinfo['hypcov'])
        subinfo['R'] = (1 - subinfo['nug']) * R + subinfo['nug'] * np.eye(
            R.shape[0])
        if gvar is not None:
            subinfo['R'] += np.diag(gvar)
        n = subinfo['R'].shape[0]
        W, V = np.linalg.eigh(subinfo['R'])
        Vh = V / np.sqrt(np.abs(W))
        fcenter = Vh.T @ g
        subinfo['sig2'] = (np.mean(fcenter**2) * n + 1) / (n + 1)
        subinfo['Rinv'] = Vh @ Vh.T
        subinfo['Vh'] = Vh
    subinfo['pw'] = subinfo['Rinv'] @ g
    return subinfo
Example #5
0
def supplementtheta(fitinfo, size, theta, thetachoices, choicecosts, cal,
                    **kwargs):
    r'''
    Suggests next parameters and obviates pending parameters for value
    retrieval of `f`.

    Parameters
    ----------
    fitinfo : dict
        An arbitary dictionary where you placed all your important fitting
        information from the fit function above.
    size : integer
        The number of new thetas the user wants.
    theta : array
        An array of theta values where you want to predict.
    thetachoices : array
        An array of thetas to choose from.
    choicecosts : array
        The computation cost of each theta choice given to you.
    cal : instance of emulator class
        An emulator class instance as defined in calibration.
        This will not always be provided.
    **kwargs : dict
        A dictionary containing additional options.  Specific arguments:
            - `'pending'`: a matrix (sized like `f`) to indicate pending value retrieval of `f`
            - `'costpending'`: the cost to obviate pending thetas
            - `'includepending'`: boolean to include pending values for obviation considerations
        Example usage: `kwargs = {'includepending': True, 'costpending': 0.01+0.99*np.mean(pending,0),
                                  'pending': pending}`.

    Returns
    ----------
    Note that we should have `theta.shape[0] * x.shape[0] < size`.
    theta : array
        Suggested parameters for further value retrievals of `f`.
    info : dict
        A dictionary to contain selection and obviation information. Contains arguments:
            - `'crit'`: criteria associated with selected thetas
            - `'obviatesugg'`: indices in `pending` for suggested obviations
    '''
    pending = None
    if ('pending' in kwargs.keys()):
        pending = kwargs['pending'].T
        pendvar = __getnewvar(fitinfo, pending)

    if ('includepending'
            in kwargs.keys()) and (kwargs['includepending'] is True):
        includepending = True
        if ('costpending' in kwargs.keys()):
            costpending = kwargs['costpending'] * \
                          np.ones(fitinfo['theta'].shape[0])
        else:
            costpending = np.mean(choicecosts) * \
                          np.ones(fitinfo['theta'].shape[0])
    else:
        includepending = False
    if theta is None:
        raise ValueError('this method is designed to take in the '
                         'theta values.')

    infos = copy.copy(fitinfo['emulist'])
    thetaold = copy.copy(fitinfo['theta'])
    varpca = copy.copy(fitinfo['pcstdvar'])
    thetaposs = thetachoices

    rsave = np.array(np.ones(len(infos)), dtype=object)
    rposssave = np.array(np.ones(len(infos)), dtype=object)
    rnewsave = np.array(np.ones(len(infos)), dtype=object)
    R = np.array(np.ones(len(infos)), dtype=object)

    crit = np.zeros(thetaposs.shape[0])
    weightma = np.mean(fitinfo['pct']**2, 0)

    # covariance matrices between new thetas, thetachoices, and thetas in fit.
    for k in range(0, len(infos)):
        if infos[k]['hypind'] == k:
            rsave[k] = (1 - infos[k]['nug']) * __covmat(
                theta, thetaold, infos[k]['hypcov'])
            rposssave[k] = (1 - infos[k]['nug']) * __covmat(
                thetaposs, thetaold, infos[k]['hypcov'])
            rnewsave[k] = (1 - infos[k]['nug']) * __covmat(
                thetaposs, theta, infos[k]['hypcov'])
            R[k] = __covmat(thetaold, thetaold, infos[k]['hypcov'])
            R[k] = (1 - infos[k]['nug']) * R[k] + np.eye(R[k].shape[0]) * \
                infos[k]['nug']

    critsave = np.zeros(thetaposs.shape[0])
    critcount = np.zeros((crit.shape[0], len(infos)))

    if pending is None:
        varpcause = 1 * varpca
    else:
        varpcause = 1 * pendvar

    # calculation of selection criterion
    thetachoicesave = np.zeros((size, fitinfo['theta'].shape[1]))
    for j in range(0, size):
        critcount = np.zeros((crit.shape[0], len(infos)))
        if thetaposs.shape[0] < 1.5:
            thetaold = np.vstack((thetaold, thetaposs))
            break
        for k in range(0, len(infos)):
            if infos[k]['hypind'] == k:  # this is to speed things up a bit...
                Rh = R[infos[k]['hypind']] + np.diag(varpcause[:, k])
                p = rnewsave[infos[k]['hypind']]
                term1 = np.linalg.solve(Rh, rposssave[infos[k]['hypind']].T)
                q = rsave[infos[k]['hypind']] @ term1
                r = rposssave[infos[k]['hypind']].T * term1
                critcount[:, k] = weightma[k] * np.mean((p.T - q) ** 2, 0) / \
                    np.abs(1 - np.sum(r, 0))
            else:
                critcount[:, k] = weightma[k] / weightma[infos[k]['hypind']] * \
                                  critcount[:, infos[k]['hypind']]
        crit = np.sum(critcount, 1)
        jstar = np.argmax(crit / choicecosts)
        critsave[j] = crit[jstar] / choicecosts[jstar]
        thetaold = np.vstack((thetaold, thetaposs[jstar]))
        thetachoicesave[j] = thetaposs[jstar]
        thetaposs = np.delete(thetaposs, jstar, 0)
        for k in range(0, len(infos)):
            if infos[k]['hypind'] == k:
                R[k] = np.vstack((R[k], rposssave[k][jstar, :]))
                R[k] = np.vstack((R[k].T, np.append(rposssave[k][jstar, :],
                                                    1))).T
                newr = (1 - infos[k]['nug']) * \
                    __covmat(thetaposs, thetaold[-1, :], infos[k]['hypcov'])
                rposssave[k] = np.delete(rposssave[k], jstar, 0)
                rposssave[k] = np.hstack((rposssave[k], newr))
                rsave[k] = np.hstack((rsave[k], rnewsave[k][jstar, :][:,
                                                                      None]))
                rnewsave[k] = np.delete(rnewsave[k], jstar, 0)
        crit = np.delete(crit, jstar)
        critcount = np.delete(critcount, jstar, 0)
        choicecosts = np.delete(choicecosts, jstar)
        varpcause = np.vstack((varpcause, 0 * varpca[0, :]))
        varpca = np.vstack((varpca, 0 * varpca[0, :]))

        for k in range(0, len(infos)):
            if infos[k]['hypind'] == k:
                rsave[k] = (1 - infos[k]['nug']) * __covmat(
                    theta, thetaold, infos[k]['hypcov'])
                R[k] = __covmat(thetaold, thetaold, infos[k]['hypcov'])
                R[k] = (1 - infos[k]['nug']) * R[k] + np.eye(R[k].shape[0]) * \
                    infos[k]['nug']

    # calculation of obviation criterion and suggests obviations.
    info = {}
    info['crit'] = critsave
    if includepending:
        critpend = np.zeros((fitinfo['theta'].shape[0], len(infos)))
        for k in range(0, len(infos)):
            if infos[k]['hypind'] == k:  # this is to speed things up a bit...
                Rh = R[infos[k]['hypind']] + np.diag(varpca[:, k])
                term1 = np.linalg.solve(Rh, rsave[infos[k]['hypind']].T)
                delta = (pendvar[:, k] - varpca[:fitinfo['theta'].shape[0], k])
                term3 = np.diag(np.linalg.inv(Rh))[:fitinfo['theta'].shape[0]]
                critpend[:, k] = -weightma[k] * delta * \
                    np.mean((term1[:fitinfo['theta'].shape[0], :] ** 2), 1) / (1 + delta * term3)
            else:
                critpend[:, k] = weightma[k] / weightma[infos[k]['hypind']] * \
                                 critpend[:, infos[k]['hypind']]
        critpend = np.sum(critpend, 1)
        info['obviatesugg'] = np.where(
            np.any(pending, 1) *
            (np.mean(critsave[:size]) > critpend / costpending) > 0.5)[0]
    return thetachoicesave, info