Exemple #1
0
def predict_probability_area(model, upper_bound, lower_bound):
    """
    Predict the probability that the true location is within a specified bounding box given a GMM model

    Args:
        model (mixture.GMM): GMM model to use
        upper_bound (list): [upper lat, right lon] of bounding box
        lower_bound (list): [lower_lat, left_lon] of bounding box

    Returns:
        total_prob (float): Probability from 0 to 1 of true location being in bounding box
    """
    total_prob = 0
    for i in range(0, len(model.weights_)):
        val = ext.mvnormcdf(upper_bound,
                            model.means_[i],
                            model.covars_[i],
                            lower_bound,
                            maxpts=2000)
        # below is necessary as a very rare occurance causes some guassians to have a result of nan
        #(likely exeedingly low probability)
        if math.isnan(val):
            pass
        else:
            weighted_val = val * model.weights_[i]
            total_prob += weighted_val
    return total_prob
def N2_f(d1, d2, rho):
    muStandardNormal = 0.0
    varStandardNormal = 1.0
    upper = ([d1, d2])  #상한
    v = varStandardNormal  # 단순화
    mu = muStandardNormal
    covM = ([v, rho], [rho, v])
    return extras.mvnormcdf(upper, mu, covM)
def N2_f(d1, d2, rho):
    import statsmodels.sandbox.distributions.extras as extras
    muStandardNormal = 0.0  # mean of a standard normal distribution
    varStandardNormal = 1.0  # variance of standard normal distribution
    upper = ([d1, d2])  # upper bound for two values
    v = varStandardNormal  # simplify our notations
    mu = muStandardNormal  # simplify our notations
    covM = ([v, rho], [rho, v])
    return extras.mvnormcdf(upper, mu, covM)
Exemple #4
0
 def predict_probability_area(model, upper_bound, lower_bound):
     total_prob = 0
     for i in range(0, len(model.weights_)):
         val = ext.mvnormcdf(upper_bound, model.means_[i], model.covars_[i], lower_bound, maxpts=2000)
         # below is necessary as a very rare occurance causes some guassians to have a result of nan
         #(likely exeedingly low probability)
         if math.isnan(val):
             pass
         else:
             weighted_val = val * model.weights_[i]
             total_prob += weighted_val
     return total_prob
def _compute_mvnorm_image_dim_2(upper_bound, out_res, mu, sigma):

    out_image = np.zeros([out_res] * 2)

    x_vals = np.linspace(0, upper_bound, out_res + 1)
    y_vals = np.linspace(0, upper_bound, out_res + 1)

    for i in range(out_res):
        for j in range(out_res):

            out_image[j, i] = mvnormcdf([x_vals[i + 1], y_vals[j + 1]],
                                        mu,
                                        sigma,
                                        lower=[x_vals[i], y_vals[j]])

    return out_image
Exemple #6
0
 def predict_probability_area(model, upper_bound, lower_bound):
     total_prob = 0
     for i in range(0, len(model.weights_)):
         val = ext.mvnormcdf(upper_bound,
                             model.means_[i],
                             model.covars_[i],
                             lower_bound,
                             maxpts=2000)
         # below is necessary as a very rare occurance causes some guassians to have a result of nan
         #(likely exeedingly low probability)
         if math.isnan(val):
             pass
         else:
             weighted_val = val * model.weights_[i]
             total_prob += weighted_val
     return total_prob
Exemple #7
0
def _cross_moments_inner(ksi, eta, means, stds, rho, handle):
    ''''''

    mean, cov = simulation.param_converter(means, stds, rho)
    assert handle.shape == (2, 1)
    adj_mean = np.squeeze(mean + cov.dot(handle), axis=1)
    assert adj_mean.shape == (2, ), "{}".format(adj_mean.shape)

    mut = 0.5 * (((adj_mean.T).dot(inv(cov))).dot(adj_mean)) - 0.5 * ((
        (mean.T).dot(inv(cov))).dot(mean))
    upper = np.zeros((2, ))
    upper[0] = np.log(ksi / X_0[0])
    upper[1] = np.log(eta / X_0[1])
    ret = - 1 + g_func(ksi/X_0[0], mean=adj_mean[0], sigma2=stds[0]**2) \
            + g_func(eta/X_0[1], mean=adj_mean[1], sigma2=stds[1]**2) \
            + mvnormcdf(upper, adj_mean, cov)
    ret *= np.exp(mut)
    return ret
def N2_f(d1, d2, rho):
    """cumulative bivariate standard normal distribution 
       d1: the first value
       d2: the second value
       rho: correlation

       Example1:
               print(N2_f(0,0,1.)) => 0.5
       Example2:
               print(N2_f(0,0,0)  => 0.25
     """
    import statsmodels.sandbox.distributions.extras as extras
    muStandardNormal = 0.0  # mean of a standard normal distribution
    varStandardNormal = 1.0  # variance of standard normal distribution
    upper = ([d1, d2])  # upper bound for two values
    v = varStandardNormal  # simplify our notations
    mu = muStandardNormal  # simplify our notations
    covM = ([v, rho], [rho, v])
    return extras.mvnormcdf(upper, mu, covM)
Exemple #9
0
def get_normal_probabilities(num_bins, mean, cov):
    """This function returns a grid of binned normal probabilities."""
    # Currently this only works for four-dimensional normal distribution.
    num_dims = 4

    q = np.tile(np.nan, [num_bins] * num_dims)

    grids = list()
    for i in range(num_dims):
        scale = np.sqrt(cov[i, i])
        lower, upper = -1.96 * scale, 1.96 * scale

        grid = np.linspace(lower, upper, num_bins - 1, endpoint=True)
        grid = np.concatenate(([-np.inf], grid, [np.inf]), axis=0)
        grids += [grid]

    wv, xv, yv, zv = np.meshgrid(*grids, indexing='ij')

    for i in range(1, num_bins + 1):
        for j in range(1, num_bins + 1):
            for k in range(1, num_bins + 1):
                for l in range(1, num_bins + 1):

                    w_upper, w_lower = wv[i, j, k, l], wv[i - 1, j, k, l]
                    x_upper, x_lower = xv[i, j, k, l], xv[i, j - 1, k, l]
                    y_upper, y_lower = yv[i, j, k, l], yv[i, j, k - 1, l]
                    z_upper, z_lower = zv[i, j, k, l], zv[i, j, k, l - 1]

                    upper = [w_upper, x_upper, y_upper, z_upper]
                    lower = [w_lower, x_lower, y_lower, z_lower]

                    q[i - 1, j - 1, k - 1,
                      l - 1] = mvnormcdf(upper, mean, cov, lower)

    # Getting started with some basic consistency checks.
    np.testing.assert_equal(np.all(q >= 0), True)
    np.testing.assert_equal(0.98 < np.sum(q) < 1.02, True)

    # Scaling output to ensure that probabilities sum to one.
    q = q / np.sum(q)

    return q, grid
 def marg_cdf(self, u):
     """
     u is DataFrame n_obs x targets
     """
     targets = list(u.columns)
     x = self.make_input(u)
     if len(targets) <= 1:
         # standard univariate normal
         res = ss.norm.cdf(x)
         res = pd.Series(res[:, 0], index=u.iloc[:, 0])
         #  res = pd.Series(res, index=u[:, 0])
     else:
         # mvnormcdf does not accept multiple input points
         res = np.zeros(x.shape[0])
         ml = [0] * len(targets)
         for i in range(x.shape[0]):
             xl = np.array(x.iloc[i, :])
             cv = np.array(self.cr.loc[targets, targets])
             res[i] = mvnormcdf(xl, ml, cv)
         res = pd.Series(res, index=range(u.shape[0]))
     res.name = 'Cond CDF of ' + ', '.join(targets)
     return res
Exemple #11
0
def predict_probability_area(model, upper_bound, lower_bound):
    """
    Predict the probability that the true location is within a specified bounding box given a GMM model

    Args:
        model (mixture.GMM): GMM model to use
        upper_bound (list): [upper lat, right lon] of bounding box
        lower_bound (list): [lower_lat, left_lon] of bounding box

    Returns:
        total_prob (float): Probability from 0 to 1 of true location being in bounding box
    """
    total_prob = 0
    for i in range(0, len(model.weights_)):
        val = ext.mvnormcdf(upper_bound, model.means_[i], model.covars_[i], lower_bound, maxpts=2000)
        # below is necessary as a very rare occurance causes some guassians to have a result of nan
        #(likely exeedingly low probability)
        if math.isnan(val):
            pass
        else:
            weighted_val = val * model.weights_[i]
            total_prob += weighted_val
    return total_prob
 def cond_cdf(self, u, u_cond):
     """
     u is DataFrame n_obs x targets
     u_cond is DataFrame 1 x conditionals
     """
     uu = u.values[:, 0]
     self.fit_cond(targets=u.columns, conditionals=u_cond.columns)
     x, x_cond, mn = self.make_input(u, u_cond)
     if len(self.targets) <= 1:
         # univariate normal
         res = ss.norm.cdf(x, mn.iloc[0], self.cond_cov.iloc[0, 0] ** 0.5)
         res = pd.Series(res[:, 0], index=uu)
         #  res = pd.Series(res, index=u[:, 0])
     else:
         # mvnormcdf does not accept multiple input points
         res = np.zeros(x.shape[0])
         for i in range(x.shape[0]):
             xl = np.array(x.iloc[i, :])
             ml = np.array(mn)[0]
             cv = np.array(self.cond_cov)
             res[i] = mvnormcdf(xl, ml, cv)
         res = pd.Series(res, index=range(u.shape[0]))
     res.name = 'Cond CDF of ' + ', '.join(self.targets.astype('str'))
     return res
Exemple #13
0
def threshold_prob(YY,
                   index,
                   GG,
                   beta,
                   mu,
                   Vp,
                   h2,
                   FF,
                   TT,
                   maxpts_mult=20000,
                   log_out=True,
                   abseps=None,
                   releps=None,
                   genz=False):
    """Calculate the probability of binary phenotypes in a pedigree, conditional on index individuals.

    Keyword arguments:
    YY    -- Binary phenotype array, numpy array with 0 for below thresh, 1 for above
    index -- List of index patient indexes
    GG    -- Genotypes, numpy array of 0,1,2 giving the number of alleles
    beta  -- Effect size of the Mendelian locus
    mu    -- Mean population trait value
    Vp    -- Population trait variance
    h2    -- Trait heritability
    FF    -- Kinship matrix
    TT    -- Trait threshold for exhibiting the phenotype
    """
    n_above = np.sum(YY)
    n_below = np.size(YY) - n_above
    below_FF = subset_matrix(YY, FF, 0)
    above_FF = subset_matrix(YY, FF, 1)
    below_GG = np.array([GG_i for ii, GG_i in enumerate(GG) if YY[ii] == 0])
    above_GG = np.array([GG_i for ii, GG_i in enumerate(GG) if YY[ii] == 1])

    YY_index = np.array([1 * (ii in index) for ii, _ in enumerate(YY)])

    index_FF = subset_matrix(YY_index, FF, 1)

    GG_index = [GG_i for ii, GG_i in enumerate(GG) if ii in index]

    lower_lims = [
        xx if xx == xx else TT for xx in -np.inf * (1 - np.array(YY))
    ]
    upper_lims = [xx if xx == xx else TT for xx in np.inf * np.array(YY)]
    means = np.ones(np.size(YY)) * mu + np.array(GG) * beta
    cov = Vp * h2 * FF + Vp * (1 - h2) * np.identity(np.size(YY))

    if genz:
        # infin = np.zeros(len(lower_lims))
        # for ii, lower_lim in enumerate(lower_lims):
        #     if lower_lim == TT:
        #         infin[ii] = 1
        # correl = np.zeros(len(lower_lims)*(len(lower_lims)-1))
        # error, P1, inform = mvn.mvndst(lower=lower_lims, upper=upper_lims,
        #                                infin=infin, correl=cov)\
        if abseps is None:
            P1 = mvstdnormcdf(lower=lower_lims,
                              upper=upper_lims,
                              corrcoef=cov,
                              maxpts=np.size(YY) * maxpts_mult)
        else:
            P1 = mvstdnormcdf(lower=lower_lims,
                              upper=upper_lims,
                              corrcoef=cov,
                              maxpts=np.size(YY) * maxpts_mult,
                              abseps=abseps)

    if releps is None:
        P1 = mvnormcdf(lower=lower_lims,
                       upper=upper_lims,
                       mu=means,
                       cov=cov,
                       maxpts=np.size(YY) * maxpts_mult)
    else:
        P1 = mvnormcdf(lower=lower_lims,
                       upper=upper_lims,
                       mu=means,
                       cov=cov,
                       maxpts=np.size(YY) * maxpts_mult,
                       releps=releps)

    lower_lims_index = [xx for ii, xx in enumerate(lower_lims) if ii in index]
    upper_lims_index = [xx for ii, xx in enumerate(upper_lims) if ii in index]
    YY_index_only = np.array([xx for ii, xx in enumerate(YY) if ii in index])
    means_index = (np.ones(np.size(YY_index_only)) * mu +
                   np.array(GG_index) * beta)
    cov_index = (Vp * h2 * index_FF + Vp *
                 (1 - h2) * np.identity(np.size(YY_index_only)))

    if np.size(lower_lims_index) > 1:
        P2 = mvnormcdf(lower=lower_lims_index,
                       upper=upper_lims_index,
                       mu=np.array(means_index),
                       cov=cov_index)
    else:
        if lower_lims_index[0] == -np.inf:
            P2 = norm.cdf(upper_lims_index[0],
                          loc=means_index[0],
                          scale=np.sqrt(cov_index[0, 0]))
        else:
            P2 = 1 - norm.cdf(lower_lims_index[0],
                              loc=means_index[0],
                              scale=np.sqrt(cov_index[0, 0]))

    if log_out:
        return np.log(P1) - np.log(P2)
    else:
        return P1 / P2
def Mfunc(V, H, F, tau, R=0.05, sigmaH=0.3, sigmaV=0.3, rho_VH=0.5):
    def covmat(rho):
        return np.array([[1, rho], [rho, 1]])

    u0 = np.array([0, 0])

    sigma = np.sqrt(sigmaV**2 + sigmaH**2 - 2 * rho_VH * sigmaV * sigmaH)
    if F > 0:
        if H > 0:
            gamma1 = (np.log(H/F) + (R - .5*sigmaH**2)*tau) / \
                (sigmaH*np.sqrt(tau))
        else:
            gamma1 = (-np.inf +
                      (R - .5 * sigmaH**2) * tau) / (sigmaH * np.sqrt(tau))

        if V > 0:
            gamma2 = (np.log(V/F) + (R - .5*sigmaV**2)*tau) / \
                (sigmaV*np.sqrt(tau))
        else:
            gamma2 = (-np.inf +
                      (R - .5 * sigmaV**2) * tau) / (sigmaV * np.sqrt(tau))

    else:
        if H > 0:
            gamma1 = (np.inf +
                      (R - .5 * sigmaH**2) * tau) / (sigmaH * np.sqrt(tau))
        else:
            gamma1 = (+(R - .5 * sigmaH**2) * tau) / (sigmaH * np.sqrt(tau))
        if V > 0:
            gamma2 = (np.inf +
                      (R - .5 * sigmaV**2) * tau) / (sigmaV * np.sqrt(tau))
        else:
            gamma2 = (+(R - .5 * sigmaV**2) * tau) / (sigmaV * np.sqrt(tau))

    alpha1 = gamma1 + sigmaH * np.sqrt(tau)
    if H > 0:
        if V > 0:
            alpha2 = (np.log(V / H) - 0.5 * sigma**2 * tau) / (sigma *
                                                               np.sqrt(tau))
        else:
            alpha2 = (-np.inf - 0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau))

    else:
        if V > 0:
            alpha2 = (np.inf - 0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau))
        else:
            alpha2 = (-0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau))

    beta1 = gamma2 + sigmaV * np.sqrt(tau)
    if V > 0:
        if H > 0:
            beta2 = (np.log(H / V) - 0.5 * sigma**2 * tau) / (sigma *
                                                              np.sqrt(tau))
        else:
            beta2 = (-np.inf - 0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau))

    else:
        if H > 0:
            beta2 = (-0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau))
        else:
            beta2 = (-0.5 * sigma**2 * tau) / (sigma * np.sqrt(tau))

    l1 = np.array([alpha1, alpha2]).flatten()

    if any(l1 == -np.inf):
        t1 = 0.0
    else:
        t1 = H * mvnormcdf(l1, u0, covmat((rho_VH * sigmaV - sigmaH) / sigma))

    l2 = np.array([beta1, beta2]).flatten()

    if any(l2 == -np.inf):
        t2 = 0.0
    else:
        t2 = V * mvnormcdf(l2, u0, covmat((rho_VH * sigmaH - sigmaV) / sigma))

    l3 = np.array([gamma1, gamma2]).flatten()

    if any(l3 == -np.inf):
        t3 = 0.0
    else:
        t3 = F * np.exp(-R * tau) * mvnormcdf(l3, u0, covmat(rho_VH))
    if np.isnan(t1) or np.isnan(t2) or np.isnan(t3):
        ipdb.set_trace()

    return t1 + t2 - t3