def emin(K):
     # Calculates E[min(L(T), K)] in LHP model
     C = norm.ppf(1 - Q(t))
     A = (1 / beta) * (C - sqrt(1 - beta * beta) * norm.ppf(K / (1 - R)))
     return (1 - R) * mvn.mvndst(upper=[C, -1 * A],
                                 lower=[0, 0],
                                 infin=[0, 0],  # set lower bounds = -infty
                                 correl=-1 * beta)[1] + K * norm.cdf(A)
def _cbnd(a, b, rho):
    # This distribution uses the Genz multi-variate normal distribution
    # code found as part of the standard SciPy distribution
    lower = np.array([0, 0])
    upper = np.array([a, b])
    infin = np.array([0, 0])
    correl = rho
    error, value, inform = mvn.mvndst(lower, upper, infin, correl)
    return value
Beispiel #3
0
def probWolfe(t, gp, c1=0.05, c2=0.8, strong_wolfe=True):
    # evaluates the joint pdf for the probablistic wolfe conditions
    m0 = gp.m(0.0)
    d1m0 = gp.d1m(.00)
    V00 = gp.V(0.0, 0.0)
    Vd00 = gp.Vd(0.0, 0.0)
    dVd00 = gp.dVd(0.0, 0.0)

    # mean
    mt = gp.m(t)
    d1mt = gp.d1m(t)
    ma = m0 - mt + c1 * t * d1m0  # armijo rule
    mb = d1mt - c2 * d1m0  # curvature condition

    # cov
    dV0t = gp.dV(0.0, t)
    dVd0t = gp.dVd(0.0, t)

    Caa = V00 + (
        (c1 * t)**2) * dVd00 + gp.V(t, t) + 2 * (c1 * t *
                                                 (Vd00 - dV0t) - gp.V(0.0, t))
    Cbb = (c2**2) * dVd00 - 2 * c2 * dVd0t + gp.dVd(t, t)

    if Caa < 0 or Cbb < 0:  # undefined
        return 0.0

    if Caa < 1e-9 and Cbb < 1e-9:  # near deterministic case
        return 1.0 if ma >= 0 and mb >= 0 else 0.0

    Cab = -c2 * (Vd00 + c1 * t * dVd00) + c2 * dV0t + gp.dV(
        t, 0.0) + c1 * dVd0t - gp.Vd(t, t)

    #evaluate the integral
    lower = [-ma / np.sqrt(Caa), -mb / np.sqrt(Cbb)]
    if strong_wolfe:
        upper = [np.inf, np.inf]
        infin = np.array([1, 1])
    else:
        b_ = (2 * c2 * (np.abs(d1m0) + 2 * np.sqrt(dVd00)) - mb) / np.sqrt(Cbb)
        upper = [np.inf, b_]
        infin = np.array([1, 2])
    rho = Cab / np.sqrt(Caa * Cbb)
    # using mvndst from scipy which is undocumented, but from the fortran doc:
    # first argument are lower bounds (n dimensional vector)
    # second are upper bounds
    # third is an indicator vector infin, where
    #       if infin[d]  < 0, integration is done from -infinity to infinity
    #       if infin[d] == 0, integration is done from -infinity to upper[d]
    #       if infin[d] == 1, integration is done from  lower[d] to infinity
    #       if infin[d] == 2, integration is done from  lower[d] to upper[d]
    # fourth is an array with correlation coefficients (off diagonal covariances)
    #the function returns the value of the integral of a multivariate normal density function,
    # with mean zero and covariance with diagonal elements normalized to 1
    ret = mvn.mvndst(lower, upper, infin, np.array([rho]))
    return ret[1]
    def probWolfe(t):  # probability for Wolfe conditions to be fulfilled

        # marginal for Armijo condition
        ma = m0 - m(t) + c1 * t * dm0
        Vaa = V0 + (c1 * t)**2 * dVd0 + V(t) + 2 * (c1 * t *
                                                    (Vd0 - Vd0f(t)) - V0f(t))

        # marginal for curvature condition
        mb = d1m(t) - c2 * dm0
        Vbb = c2**2 * dVd0 - 2 * c2 * Vd0df(t) + dVd(t)

        # covariance between conditions
        Vab = -c2 * (Vd0 + c1 * t * dVd0) + V0df(
            t) + c2 * Vd0f(t) + c1 * t * Vd0df(t) - Vd(t)

        if (Vaa < 1e-9) and (Vbb < 1e-9):  # deterministic evaluations
            p = np.int32(ma >= 0) * np.int32(mb >= 0)
            return p, None

        # joint probability
        if Vaa <= 0 or Vbb <= 0:
            p = 0
            p12 = np.array([0, 0, 0])
            return p, p12

        rho = Vab / np.sqrt(Vaa * Vbb)

        upper = 2 * c2 * (
            (np.abs(dm0) + 2 * np.sqrt(dVd0) - mb) / np.sqrt(Vbb))
        # p = bvn(-ma / np.sqrt(Vaa), np.inf, -mb / np.sqrt(Vbb), upper, rho)
        _, p, _ = mvn.mvndst(
            np.array([-ma / np.sqrt(Vaa), -mb / np.sqrt(Vbb)]),
            np.array([np.inf, upper]), np.array([1, 2]), rho)

        # if nargout > 1:
        # individual marginal probabilities for each condition
        # (for debugging)

        p12 = np.array([
            1 - GaussCDF(-ma / np.sqrt(Vaa)),
            GaussCDF(upper) - GaussCDF(-mb / np.sqrt(Vbb)),
            Vab / np.sqrt(Vaa * Vbb)
        ])

        return p, p12
Beispiel #5
0
def prob_to_correlation(prob, s, t):  # s = mu_1/sigma_1, t = mu_2/sigma_2
    epsilon = 0.00001
    # search the unique correlation coefficient corresponding to the desired probability
    lower = np.array([-s, -t])
    upper = np.array([0, 0])  # dummy
    infin = np.array([1, 1])
    rho_left = -1
    rho_right = 1
    while rho_right - rho_left > epsilon:
        rho = (rho_left + rho_right) / 2
        error, value, inform = mvn.mvndst(lower, upper, infin, np.array([rho]))
        if value < prob:
            rho_left = rho
        elif value > prob:
            rho_right = rho
        else:
            break

    return rho_left
 def func1d(upper1d):
     '''
     Calculates the multivariate normal cumulative distribution
     function of a single sample.
     '''
     return mvn.mvndst(lower, upper1d, limit_flags, self.theta)[1]
Beispiel #7
0
def correlation_to_prob(rho, s, t):
    lower = np.array([-s, -t])
    upper = np.array([0, 0])  # no use
    infin = np.array([1, 1])
    error, value, inform = mvn.mvndst(lower, upper, infin, np.array([rho]))
    return value
Beispiel #8
0
def compute_ioannis_capital():
    start = time.time()

    # Read the correlation matrix from .csv
    Omega = pd.read_csv(r'C:\Users\Javier\Documents\MEGA\Thesis\CDS_data\factor_model\Ioannis\Omega.csv', index_col = 0,header = 0)
    Portfolio = pd.read_csv(r'C:\Users\Javier\Documents\MEGA\Thesis\CDS_data\factor_model\Ioannis\PortfolioA.csv')

    m = len(Portfolio)    # number of counterparties in the portfolio
    N = 10**6             # number of simulations
    p = Omega.shape[0]    # number of systematic factors

    # Now we get the beta, gamma (PDGSD), PD, EAD and LGD
    Beta = Portfolio[[col for col in list(Portfolio) if col.startswith('beta')]].values
    gamma = Portfolio['gamma'].values
    PD = Portfolio['PD'].values
    EAD = Portfolio['EAD'].values
    LGD = Portfolio['LGD'].values

    df_port = Portfolio[['SOV_ID','PD','EAD','LGD']]

    # Analytical Expected Loss
    EL_an = np.sum(PD*EAD*LGD)

     # Calibrate default thresholds with PDs
    d = norm.ppf(PD)
    # perform a Cholesky factorisation to sample normal distributed
    # numbers with covariaton matrix Omega
    L = np.linalg.cholesky(Omega)

    # np.random.seed(10)
    # generate independent normals
    Z = np.random.standard_normal((p, N))

    # convert independent unit normals to correlated
    F = np.dot(L, Z)

    # idiosyncratic loading s.t. the returns are standard normal
    id_load = np.diagonal(np.sqrt(1-np.dot(np.dot(Beta,Omega),Beta.T)))
    epsilon = np.random.standard_normal((N, m))
    # Put everything together to get the returns
    X = np.dot(Beta,F) + (id_load*epsilon).T
    X_df = pd.DataFrame(np.dot(Beta,F) + (id_load*epsilon).T)

    # Calculate UL with contagion

    SOV_ID = Portfolio['SOV_ID'].values
    SOV_LINK = Portfolio['SOV_LINK'].values

    df_d = pd.DataFrame(np.zeros((m,3)), columns = ['SOV_ID','Dsd','Dnsd'])
    df_d['SOV_ID']=SOV_ID

    PDs = df_port[df_port['SOV_ID']==1]['PD'].values[0]

    Dsd = np.zeros(m)
    Dnsd = np.zeros(m)

    # With contagion
    for i in range(0,m):
        if SOV_ID[i] != 0:
            Dsd[i] = d[i]
            Dnsd[i] = d[i]
        else:
            sov_ind = np.nonzero(SOV_ID == SOV_LINK[i])[0][0]
            PDs = PD[sov_ind]
            corr = np.dot(np.dot((Beta[i]).T,Omega),(Beta[sov_ind]))

            Fsd = lambda x: mvn.mvndst([-100, -100],\
                [x, norm.ppf(PDs)],[0,0],corr)[1] / PDs - gamma[i]
            Dsd[i] = fsolve(Fsd, norm.ppf(gamma[i])) # is there a better initial guess?
            Fnsd = lambda x: mvn.mvndst([-100, norm.ppf(PDs)],\
                [x, 100],[0,1],corr)[1] - PD[i] + gamma[i]*PDs
            Dnsd[i] = fsolve(Fnsd, norm.ppf(PD[i])) # is there a better initial guess?
            if Dsd[i]< d[i] or PD[i]<PD[sov_ind]:
                Dsd[i] = d[i]
                Dnsd[i] = d[i]

    df_d['Dsd'] = Dsd
    df_d['Dnsd'] = Dnsd

    # Thresholds
    D = np.array([Dnsd]*N).T
    D_df = pd.concat([df_d['Dnsd']]*N,axis = 1)
    D_df.columns = range(N)

    X2 = X_df.transpose()
    D2 = D_df.transpose()

    sov_ind = df_d[df_d['SOV_ID']==1].index[0]

    X_SD = X2[X2[sov_ind]<df_d.loc[sov_ind, 'Dsd']].copy()

    X_NSD = X2.drop(X_SD.index, axis = 0)


    I_SD = X_SD.lt(df_d['Dsd'], axis = 1)
    I_NSD = X_NSD.lt(df_d['Dnsd'],axis = 1)

    I_c = pd.concat([I_SD,I_NSD], axis = 0)

    I_aux = np.array(I_c)

    L = (EAD * LGD * I_aux)

    Loss_c = np.sum(L,axis=1)

    # Arithmetic mean of Loss
    EL_c = np.mean(Loss_c)

    # UL_98_c = np.percentile(Loss_c, 98)
    UL_99_c = np.percentile(Loss_c, 99)
    UL_995_c = np.percentile(Loss_c, 99.5)
    UL_999_c = np.percentile(Loss_c, 99.9)
    UL_9999_c = np.percentile(Loss_c, 99.99)


    UL_c = np.array([ UL_99_c, UL_995_c, UL_999_c, UL_9999_c])
    
    end = time.time()

    print(end-start)

    return UL_c
Beispiel #9
0
def simulation(time_para, delay,percent, period):
    
    start = time.time()

    # Read Portfolio and correlation matrix
    path = r'C:\Users\Javier\Documents\MEGA\Thesis\CDS_data\factor_model'
    directory = 'time_' + str(time_para) + '_delay_' + str(delay) + '_per_' + str(percent)
    file = 'period_' + str(period) + '_portfolio.csv'

    Portfolio = pd.read_csv(os.path.join(path,directory,file), sep=',')
    # Read the correlation matrix from .csv
    Omega = pd.read_csv(r'C:\Users\Javier\Documents\MEGA\Thesis\CDS_data\factor_model\Ioannis\Omega.csv', index_col = 0,header = 0)

    m = len(Portfolio)    # number of counterparties in the portfolio
    N = 10**6             # number of simulations
    p = Omega.shape[0]    # number of systematic factors

    # Now we get the beta, gamma (PDGSD), PD, EAD and LGD
    Beta = Portfolio[[col for col in list(Portfolio) if col.startswith('beta')]].values
    gamma = Portfolio['gamma'].values
    PD = Portfolio['PD'].values
    EAD = Portfolio['EAD'].values
    LGD = Portfolio['LGD'].values

    # Analytical Expected Loss
    EL_an = np.sum(PD*EAD*LGD)

    # Calibrate default thresholds with PDs
    d = norm.ppf(PD)
    
    # perform a Cholesky factorisation to sample normal distributed
    # numbers with covariaton matrix Omega
    L = np.linalg.cholesky(Omega)

    np.random.seed(10)
    # generate independent normals
    Z = np.random.standard_normal((p, N))

    # convert independent unit normals to correlated
    F = np.dot(L, Z)

    # idiosyncratic loading s.t. the returns are standard normal
    id_load = np.diagonal(np.sqrt(1-np.dot(np.dot(Beta,Omega),Beta.T)))
    epsilon = np.random.standard_normal((N, m))
    # Put everything together to get the returns
    X = np.dot(Beta,F) + (id_load*epsilon).T
    X_df = pd.DataFrame(np.dot(Beta,F) + (id_load*epsilon).T)

    # Calculate UL with no contagion
    # construct default indicator
    I = (((X.T-d)<0))
    I_df = pd.DataFrame(I)
    L = (EAD*LGD*I).T
    # print(np.mean(L,axis=1))
    Loss=np.sum(L,axis=0)

    # Calculate UL with contagion

    SOV_ID = Portfolio['SOV_ID'].values
    SOV_LINK = Portfolio['SOV_LINK'].values

    Dsd = np.zeros(m)
    Dnsd = np.zeros(m)

    # With contagion
    for i in range(0,m):
        if SOV_ID[i] != 0:
            Dsd[i] = d[i]
            Dnsd[i] = d[i]
        else:
            sov_ind = np.nonzero(SOV_ID == SOV_LINK[i])[0][0]
            PDs = PD[sov_ind]
            corr = np.dot(np.dot((Beta[i]).T,Omega),(Beta[sov_ind]))
            
            Fsd = lambda x: mvn.mvndst([-100, -100],\
                [x, norm.ppf(PDs)],[0,0],corr)[1] / PDs - gamma[i]
            Dsd[i] = fsolve(Fsd, norm.ppf(gamma[i])) # is thera a better initial guess?
            Fnsd = lambda x: mvn.mvndst([-100, norm.ppf(PDs)],\
                [x, 100],[0,1],corr)[1] - PD[i] + gamma[i]*PDs
            Dnsd[i] = fsolve(Fnsd, norm.ppf(PD[i])) # is there a better initial guess?
            if Dsd[i]< d[i] or PD[i]<PD[sov_ind]:
                Dsd[i] = d[i]
                Dnsd[i] = d[i]


    # Thresholds
    D = np.array([Dnsd]*N).T

    X_sov = X[np.nonzero(SOV_ID !=0)[0]]
    D_sov = D[np.nonzero(SOV_ID !=0)[0]]

    I_sov = (((X_sov-D_sov)<0))

    for i in range(0,N):
        for j in range(0,m):
            if SOV_ID[j]==0 and I_sov[SOV_LINK[j]-1,i] == 1:
                D[j,i] = Dsd[j]

    # construct default indicator
    I_c = ((X-D)<0).T

    L = (EAD*LGD*I_c).T
    # print(np.mean(L,axis=1))
    Loss_c=np.sum(L,axis=0)

    EL = np.mean(Loss)
    # Arithmetic mean of Loss
    EL_c = np.mean(Loss_c)

    # UL_98 = np.percentile(Loss, 98)
    UL_99 = np.percentile(Loss, 99)
    UL_995 = np.percentile(Loss, 99.5)
    UL_999 = np.percentile(Loss, 99.9)
    UL_9999 = np.percentile(Loss, 99.99)

    # UL_98_c = np.percentile(Loss_c, 98)
    UL_99_c = np.percentile(Loss_c, 99)
    UL_995_c = np.percentile(Loss_c, 99.5)
    UL_999_c = np.percentile(Loss_c, 99.9)
    UL_9999_c = np.percentile(Loss_c, 99.99)


    UL = np.array([ UL_99, UL_995, UL_999, UL_9999])
    UL_c = np.array([ UL_99_c, UL_995_c, UL_999_c, UL_9999_c])
    
    
    end = time.time()

    print(end-start)
    

    return UL, UL_c
Beispiel #10
0
   p_T = norm.cdf(u1_t) + alpha_exp_fact*norm.cdf(-u2_t)
   p_T1 = norm.cdf(u1_t1) + alpha_exp_fact*norm.cdf(-u2_t1) 
   p_marg = (p_T1 - p_T)/(1-p_T)
   p_marg_inv = norm.ppf(p_marg)
   # Find Covariance matrix
   #sys.exit(1)
   corr_vector = np.exp(-sq_dist_low_sq_m/sample_prop[2*N]**2)   # Input according to 
   # (http://www.math.wsu.edu/faculty/genz/software/fort77/mvndstpack.f)
   # Interval
    # If there is rupture (e.g X(j)=1), the z < inv_Phi(p), then inf = 0 
       # (e.g. [-inf, inv_Phi(p)])
   infin = 1 - Rupture_history
   #log_lik
   log_lik_data = 0
   for j in range(n_years):
       error,value,inform = mvn.mvndst(p_marg_inv[j,:],p_marg_inv[j,:],infin[j,:],corr_vector)
       log_lik_data += np.log(value)
 
   ####### Posterior_lik ######
   post_log_like = log_prior + log_lik_data
   A = np.exp(post_log_like - prev_post_log_lik)
   #print 'Acceptance Rate: ', A
   U = np.random.random_sample()
   if U < min(A,1):
       print "Sample: ", i, ' in process ', rank, ': Moved'
       parameter_samples[i,:] = sample_prop
       prev_post_log_lik = post_log_like
       Acc += 1
       parameter_likelihood[i] = post_log_like
   else:
       print "Sample: ", i, ' in process ', rank, ': Did not move'
Beispiel #11
0
def _mvstdnormcdf(lower, upper, corrcoef, **kwds):
    """
    standardized multivariate normal cumulative distribution function
    This is a wrapper for scipy.stats.kde.mvn.mvndst which calculates
    a rectangular integral over a standardized multivariate normal
    distribution.

    This function assumes standardized scale, that is the variance in each dimension
    is one, but correlation can be arbitrary, covariance = correlation matrix
    Parameters
    ----------
    lower, upper : array_like, 1d
       lower and upper integration limits with length equal to the number
       of dimensions of the multivariate normal distribution. It can contain
       -np.inf or np.inf for open integration intervals
    corrcoef : float or array_like
       specifies correlation matrix in one of three ways, see notes
    optional keyword parameters to influence integration
        * maxpts : int, maximum number of function values allowed. This
             parameter can be used to limit the time. A sensible
             strategy is to start with `maxpts` = 1000*N, and then
             increase `maxpts` if ERROR is too large.
        * abseps : float absolute error tolerance.
        * releps : float relative error tolerance.
    Returns
    -------
    cdfvalue : float
        value of the integral
    Notes
    -----
    The correlation matrix corrcoef can be given in 3 different ways
    If the multivariate normal is two-dimensional than only the
    correlation coefficient needs to be provided.
    For general dimension the correlation matrix can be provided either
    as a one-dimensional array of the upper triangular correlation
    coefficients stacked by rows, or as full square correlation matrix
    See Also
    --------
    mvnormcdf : cdf of multivariate normal distribution without
        standardization
    :return:
    """
    n = len(lower)
    lower = np.array(lower)
    upper = np.array(upper)
    correl = np.zeros(n * (n - 1) / 2)
    corrcoef = np.array(corrcoef)

    if (lower.ndim != 1) or (upper.ndim != 1):
        raise ValueError("Error: can handle only 1D bounds")
    if len(upper) != n:
        raise ValueError("Error: bounds have different lengths")
    if n == 2 and corrcoef.size == 1:
        correl = corrcoef
        # print 'case scalar rho', n
    elif corrcoef.ndim == 1 and len(corrcoef) == n * (n - 1) / 2.0:
        # print 'case flat corr', corrcoeff.shape
        correl = corrcoef
    elif corrcoef.shape == (n, n):
        # print 'case square corr',  correl.shape
        correl = corrcoef[np.tril_indices(n, -1)]
    else:
        raise ValueError("Error: corrcoef has incorrect dimension")

    if not 'maxpts' in kwds:
        if n > 2:
            kwds['maxpts'] = 10000 * n

    lowinf = np.isneginf(lower)
    uppinf = np.isposinf(upper)
    infin = 2.0 * np.ones(n)

    np.putmask(infin, lowinf, 0)  # infin.putmask(0,lowinf)
    np.putmask(infin, uppinf, 1)  # infin.putmask(1,uppinf)
    # this has to be last
    np.putmask(infin, lowinf * uppinf, -1)

    ##    #remove infs
    ##    np.putmask(lower,lowinf,-100)# infin.putmask(0,lowinf)
    ##    np.putmask(upper,uppinf,100) #infin.putmask(1,uppinf)

    error, cdfvalue, inform = mvn.mvndst(lower, upper, infin, correl, **kwds)
    if inform:
        print("Error Something wrong. {}: {}".format(INFORMCODE[inform],
                                                     error))
    return cdfvalue
Beispiel #12
0
def mvn_orthotope_density(mu, COV, lower=None, upper=None):
    """
    Estimate the probability density within a hyperrectangle for an MVN distr.

    Use the method of Alan Genz (1992) to estimate the probability density
    of a multivariate normal distribution within an n-orthotope (i.e.,
    hyperrectangle) defined by its lower and upper bounds. Limits can be
    relaxed in any direction by assigning infinite bounds (i.e. numpy.inf).

    Parameters
    ----------
    mu: float scalar or ndarray
        Mean(s) of the non-truncated distribution.
    COV: float ndarray
        Covariance matrix of the non-truncated distribution
    lower: float vector, optional, default: None
        Lower bound(s) for the truncated distributions. A scalar value can be
        used for a univariate case, while a list of bounds is expected in
        multivariate cases. If the distribution is non-truncated from below
        in a subset of the dimensions, use either `None` or assign an infinite
        value (i.e. -numpy.inf) to those dimensions.
    upper: float vector, optional, default: None
        Upper bound(s) for the truncated distributions. A scalar value can be
        used for a univariate case, while a list of bounds is expected in
        multivariate cases. If the distribution is non-truncated from above
        in a subset of the dimensions, use either `None` or assign an infinite
        value (i.e. numpy.inf) to those dimensions.
    Returns
    -------
    alpha: float
        Estimate of the probability density within the hyperrectangle
    eps_alpha: float
        Estimate of the error in alpha.

    """

    # process the inputs and get the number of dimensions
    mu = np.atleast_1d(mu)
    COV = np.atleast_2d(COV)

    if mu.shape == ():
        mu = np.asarray([mu])
        COV = np.asarray([COV])
    else:
        COV = np.asarray(COV)

    sig = np.sqrt(np.diag(COV))
    corr = COV / np.outer(sig, sig)

    ndim = mu.size

    if lower is None:
        lower = -np.ones(ndim) * np.inf
    else:
        lower = np.atleast_1d(lower)

    if upper is None:
        upper = np.ones(ndim) * np.inf
    else:
        upper = np.atleast_1d(upper)

    # replace None with np.inf
    lower[np.where(lower == None)[0]] = -np.inf
    lower = lower.astype(np.float64)
    upper[np.where(upper == None)[0]] = np.inf
    upper = upper.astype(np.float64)

    # standardize the truncation limits
    lower = (lower - mu) / sig
    upper = (upper - mu) / sig

    # prepare the flags for infinite bounds (these are needed for the mvndst
    # function)
    lowinf = np.isneginf(lower)
    uppinf = np.isposinf(upper)
    infin = 2.0 * np.ones(ndim)

    np.putmask(infin, lowinf, 0)
    np.putmask(infin, uppinf, 1)
    np.putmask(infin, lowinf * uppinf, -1)

    # prepare the correlation coefficients
    if ndim == 1:
        correl = 0
    else:
        correl = corr[np.tril_indices(ndim, -1)]

    # estimate the density
    eps_alpha, alpha, __ = mvndst(lower, upper, infin, correl)

    return alpha, eps_alpha
Beispiel #13
0
def get_mvn_action_likelihood_marginal_mvndst(states, actions, means, covs):
    ''' Rewriting the original multivariate action likelihood to marginalize out inactive vars
        uses Alan Genz's multivariate normal Fortran function 'mvndst' in Scipy

    Args:
        states (np.array): m-length state or n x m array of states
        actions (np.array): m-length action or n x m array of actions
        means (np.array): k x m array of means for k subgoals
        covs (np.array): k x m x m array of m covariance matrices for k subgoals

    Returns:
        action_likelihoods (np.array): n x k array of likelihoods for each subgoal for n states

    TODO:
        marginalize inactive variables by dropping covariances instead of computing whole domain
    '''

    if states.shape != actions.shape:
        raise ValueError('state and action args must have equal dimension.')

    elif states.ndim == 1:
        states = np.expand_dims(states, axis=0)
        actions = np.expand_dims(actions, axis=0)

    action_likelihoods = np.zeros((states.shape[0], means.shape[0]))
    indicator = np.zeros(action_likelihoods.shape)

    # For state, action pair index i
    for i in xrange(states.shape[0]):
        # Find active axes and skip if null input
        active = np.where(actions[i] != 0)[0]
        if active.size == 0:
            break

        # Else, compute mvn pdf integration for each subgoal
        # Bounds are shifted so that dist is zero mean
        for g in xrange(means.shape[0]):
            low = np.copy(states[i] - means[g])
            upp = np.copy(states[i] - means[g])
            infin = np.zeros(actions.shape[1])

            # Iterate through active indices and set low and upper bounds of ATD action-targeted domain
            # infin is an integer code used by func mvndst.f
            for j in xrange(actions.shape[1]):
                if actions[i, j] < 0:  # Negative action
                    infin[j] = 0
                elif actions[i, j] > 0:  # Postive action
                    infin[j] = 1
                else:
                    infin[j] = -1

            # Marginalize out inactive variables by dropping means and covariances
            corr = pack_covs(covs[g])
            # logging.info('Correlation coeff: %s \n'
            #              'Covariance matrix: %s \n'
            #              'Active: %s' % (corr, covs, active))

            _, action_likelihoods[i, g], indicator[i, g] = mvn.mvndst(low, upp, infin, corr)

            # if (indicator[i, g] == 1):
            #     logging.error('mvn.mvndst() failed with args: \n'
            #                   'low: %s \n upp: %s \n'
            #                   'infin: %s \n corr: %s \n' % (low, upp, infin, corr))
    return action_likelihoods
Beispiel #14
0
def mvstdnormcdf(lower, upper, corrcoef,maxpts = None, **kwds):
    '''standardized multivariate normal cumulative distribution function

    This is a wrapper for scipy.stats.kde.mvn.mvndst which calculates
    a rectangular integral over a standardized multivariate normal
    distribution.
    
    This function assumes standardized scale, that is the variance in each dimension
    is one, but correlation can be arbitrary, covariance = correlation matrix

    Parameters
    ----------
    lower, upper : array_like, 1d
       lower and upper integration limits with length equal to the number
       of dimensions of the multivariate normal distribution. It can contain
       -np.inf or np.inf for open integration intervals
    corrcoef : float or array_like
       specifies correlation matrix in one of three ways, see notes
    optional keyword parameters to influence integration
        * maxpts : int, maximum number of function values allowed. This 
             parameter can be used to limit the time. A sensible 
             strategy is to start with `maxpts` = 1000*N, and then
             increase `maxpts` if ERROR is too large.
        * abseps : float absolute error tolerance.
        * releps : float relative error tolerance.

    Returns
    -------
    cdfvalue : float
        value of the integral


    Notes
    -----
    The correlation matrix corrcoef can be given in 3 different ways
    If the multivariate normal is two-dimensional than only the
    correlation coefficient needs to be provided.
    For general dimension the correlation matrix can be provided either
    as a one-dimensional array of the upper triangular correlation
    coefficients stacked by rows, or as full square correlation matrix

    See Also
    --------
    mvnormcdf : cdf of multivariate normal distribution without
        standardization

    Examples
    --------

    >>> print mvstdnormcdf([-np.inf,-np.inf], [0.0,np.inf], 0.5)
    0.5
    >>> corr = [[1.0, 0, 0.5],[0,1,0],[0.5,0,1]]    
    >>> assert Matrix(0.166666399198) == mvstdnormcdf(
    ...    [-np.inf,-np.inf,-100.0], 
    ...    [0.0,0.0,0.0], 
    ...    corr, abseps=2e-6
    ... )
    
    >>> 
    >>> assert Matrix(0.166666588293) == mvstdnormcdf(
    ...     [-np.inf,-np.inf,-100.0],
    ...     [    0.0,    0.0,   0.0],
    ...     corr, abseps=1e-8)                                                  #doctest: +IGNORE_EXCEPTION_DETAIL                                                 
    Traceback (most recent call last):                                         
    ...
    MvnDstError: completion with ERROR > EPS and MAXPTS function values used;
                 increase MAXPTS to decrease ERROR, ERROR = 1.8253048422e-07   
    
    >>> assert Matrix(0.166666588293) == mvstdnormcdf(
    ...    [-np.inf,-np.inf,-100.0],
    ...    [0.0,0.0,0.0],
    ...    corr,maxpts=1000000, abseps=1e-8
    ... )
    
    
    '''
    n = len(lower)
    #don't know if converting to array is necessary,
    #but it makes ndim check possible
    lower = np.array(lower)
    upper = np.array(upper)
    corrcoef = np.array(corrcoef)
    
    correl = np.zeros(n*(n-1)/2.0)  #dtype necessary?
    
    if (lower.ndim != 1) or (upper.ndim != 1):
        raise ValueError, 'can handle only 1D bounds'
    if len(upper) != n:
        raise ValueError, 'bounds have different lengths'
    if n==2 and corrcoef.size==1:
        correl = corrcoef
        #print 'case scalar rho', n
    elif corrcoef.ndim == 1 and len(corrcoef) == n*(n-1)/2.0:
        #print 'case flat corr', corrcoeff.shape
        correl = corrcoef
    elif corrcoef.shape == (n,n):
        correl = corrcoef[np.tri(n,n,-1,dtype=bool)]
    else:
        raise ValueError, 'corrcoef has incorrect dimension'

    if maxpts is None:
        maxpts = 10000*n


    lowinf = np.isneginf(lower)
    uppinf = np.isposinf(upper)
    infin = 2.0*np.ones(n)
    
    infin[lowinf] = 0
    infin[uppinf] = 1
    infin[lowinf & uppinf] = -1


    error, cdfvalue, inform = mvndst(lower,upper,infin,correl,maxpts,**kwds)
    
    if inform:
        raise MvnDstError(inform, error)
        
    return cdfvalue