예제 #1
0
def test_pmf_accuracy():
    """Compare accuracy of the probability mass function.

    Compare the results with the accuracy check proposed in [Hong2013]_,
    equation (15).
    """
    [p1, p2, p3] = np.around(np.random.random_sample(size=3), decimals=2)
    [n1, n2, n3] = np.random.random_integers(1, 10, size=3)
    nn = n1 + n2 + n3
    l1 = [p1 for i in range(n1)]
    l2 = [p2 for i in range(n2)]
    l3 = [p3 for i in range(n3)]
    p = l1 + l2 + l3
    b1 = binom(n=n1, p=p1)
    b2 = binom(n=n2, p=p2)
    b3 = binom(n=n3, p=p3)
    k = np.random.randint(0, nn + 1)
    chi_bn = 0
    for j in range(0, k + 1):
        for i in range(0, j + 1):
            chi_bn += b1.pmf(i) * b2.pmf(j - i) * b3.pmf(k - j)
    pb = PoiBin(p)
    chi_pb = pb.pmf(k)
    assert np.all(
        np.around(chi_bn, decimals=10) == np.around(chi_pb, decimals=10))
예제 #2
0
def test_cdf():
    """Test the cumulative distribution function."""
    p = [1, 1]
    pb = PoiBin(p)
    assert np.all(
        pb.cdf([1, 2]) - np.array([0., 1.]) < 4 * np.finfo(float).eps)
    assert (pb.cdf(2) - 1.) < 4 * np.finfo(float).eps
예제 #3
0
def test_check_xi_are_real():
    """Test the check that the ``xi`` values are real."""
    pb = PoiBin([0])
    xi = np.array([1 + 0j, 1.8 + 0j], dtype=complex)
    assert pb.check_xi_are_real(xi)
    xi = np.array([1 + 99j, 1.8 + 0j], dtype=complex)
    assert not pb.check_xi_are_real(xi)
예제 #4
0
def test_check_xi_are_real():
    """Test the check that the ``xi`` values are real."""
    pb = PoiBin([0])
    xi = np.array([1 + 0j, 1.8 + 0j], dtype=complex)
    assert pb.check_xi_are_real(xi)
    xi = np.array([1 + 99j, 1.8 + 0j], dtype=complex)
    assert not pb.check_xi_are_real(xi)
예제 #5
0
def test_pmf_accuracy():
    """Compare accuracy of the probability mass function.

    Compare the results with the accuracy check proposed in [Hong2013]_,
    equation (15).
    """
    [p1, p2, p3] = np.around(np.random.random_sample(size=3), decimals=2)
    [n1, n2, n3] = np.random.random_integers(1, 10, size=3)
    nn = n1 + n2 + n3
    l1 = [p1 for i in range(n1)]
    l2 = [p2 for i in range(n2)]
    l3 = [p3 for i in range(n3)]
    p = l1 + l2 + l3
    b1 = binom(n=n1, p=p1)
    b2 = binom(n=n2, p=p2)
    b3 = binom(n=n3, p=p3)
    k = np.random.randint(0, nn + 1)
    chi_bn = 0
    for j in range(0, k+1):
        for i in range(0, j+1):
            chi_bn += b1.pmf(i) * b2.pmf(j - i) * b3.pmf(k - j)
    pb = PoiBin(p)
    chi_pb = pb.pmf(k)
    assert np.all(np.around(chi_bn, decimals=10) == np.around(chi_pb,
                                                              decimals=10))
예제 #6
0
def test_pval():
    """Test the p-values function."""
    p = [1, 1]
    pb = PoiBin(p)

    assert np.all(pb.pval([1, 2]) - np.array([1., 1.]) <
           4 * np.finfo(float).eps)
    assert (pb.pval(2) - 1.) < 4 * np.finfo(float).eps
예제 #7
0
def test_pval():
    """Test the p-values function."""
    p = [1, 1]
    pb = PoiBin(p)

    assert np.all(
        pb.pval([1, 2]) - np.array([1., 1.]) < 4 * np.finfo(float).eps)
    assert (pb.pval(2) - 1.) < 4 * np.finfo(float).eps
예제 #8
0
def test_pmf():
    """Test the probability mass function.

    The outcomes of some results are compared with the poibin R package
    [Rpoibin]_.
    """
    p = [1, 1]
    pb = PoiBin(p)
    assert pb.pmf([1, 2]).size == 2

    # Compare results with the ones obtained with the R poibin package
    # [Rpoibin]_
    p = [0.4163448, 0.3340270, 0.9689613]
    pb = PoiBin(p)
    res = pb.pmf([0, 1, 2, 3])
    res_ref = np.array([0.0120647, 0.39129134, 0.46189012, 0.13475384])
    assert np.all(np.abs(res - res_ref) < 1e-8)

    p = [
        0.9955901, 0.5696224, 0.8272597, 0.3818746, 0.4290036, 0.8707646,
        0.8858267, 0.7557183
    ]
    pb = PoiBin(p)
    res = pb.pmf([0, 2, 7, 8])
    res_ref = np.array(
        [4.17079659e-07, 2.46250608e-03, 2.02460933e-01, 4.48023378e-02])
    assert np.all(np.abs(res - res_ref) < 1e-8)
예제 #9
0
def test_check_input_prob():
    """Test the check that input probabilities are between 0 and 1."""
    with pytest.raises(ValueError):
        pb = PoiBin([[1, 1], [1, 2]])
        pytest.fail("Input must be an one-dimensional array or a list")
    with pytest.raises(ValueError):
        pb = PoiBin([1, -1])
        pytest.fail("Input probabilities have to be non negative.")
    with pytest.raises(ValueError):
        pb = PoiBin([1, 2])
        pytest.fail("Input probabilities have to be smaller than 1.")
예제 #10
0
def win_oe_pval(win_probs, outcomes):
    """
    Given predicted Bernoulli win probabilities and actual outcomes, compute
    Poisson binomial P value.

    Args:
        win_probs (numpy.ndarray): 1D array of win probabilities of each
            match.
        outcomes (numpy.ndarray): 1D binary array of match outcomes.

    Returns:
        int: Total number of matches.
        int: Observed number of wins.
        float: Expected number of wins.
        float: Poisson binomial P value for the observed count.
        float: Poisson distribution P value for the observed count.
    """
    assert len(win_probs) == len(outcomes)
    exp_wins = np.sum(win_probs)
    obs_wins = np.sum(outcomes)
    poibin_alpha = PoiBin(win_probs).cdf(obs_wins)
    if poibin_alpha < 0.5:
        poibin_pval = poibin_alpha * 2
    else:
        poibin_pval = (1 - poibin_alpha) * 2
    pois_alpha = scipy.stats.poisson.cdf(obs_wins, exp_wins)
    if pois_alpha < 0.5:
        pois_pval = pois_alpha * 2
    else:
        pois_pval = (1 - pois_alpha) * 2
    return len(win_probs), obs_wins, exp_wins, poibin_pval, pois_pval
예제 #11
0
def test_pmf_pb_binom():
    """Compare the probability mass function with the binomial limit case."""
    # For equal probabilites p_j, the Poisson Binomial distribution reduces to
    # the Binomial one:
    p = [0.5, 0.5]
    pb = PoiBin(p)
    bn = binom(n=2, p=p[0])

    # Compare to four digits behind the comma
    assert int(bn.pmf(0) * 10000) == int(pb.pmf(0) * 10000)

    # For different probabilities p_j, the Poisson Binomial distribution and
    # the Binomial distribution are different:
    pb = PoiBin([0.5, 0.8])
    bn = binom(2, p=0.5)
    assert int(bn.pmf(0) * 10000) != int(pb.pmf(0) * 10000)
예제 #12
0
def test_pval_pb_binom():
    """Compare the p-values with the binomial limit case.

    Test that the p-values of the Poisson Binomial distribution are the same
    as the ones of the Binomial distribution when all the probabilities are
    equal.
    """
    pi = np.around(np.random.random_sample(), decimals=2)
    ni = np.random.randint(5, 500)
    pp = [pi for i in range(ni)]
    bn = binom(n=ni, p=pi)
    k = np.random.randint(0, ni)
    pval_bn = 1 - bn.cdf(k) + bn.pmf(k)
    pb = PoiBin(pp)
    pval_pb = pb.pval(k)
    assert np.all(np.around(pval_bn, decimals=10) == np.around(pval_pb,
                                                               decimals=10))
예제 #13
0
def test_pval_pb_binom():
    """Compare the p-values with the binomial limit case.

    Test that the p-values of the Poisson Binomial distribution are the same
    as the ones of the Binomial distribution when all the probabilities are
    equal.
    """
    pi = np.around(np.random.random_sample(), decimals=2)
    ni = np.random.randint(5, 500)
    pp = [pi for i in range(ni)]
    bn = binom(n=ni, p=pi)
    k = np.random.randint(0, ni)
    pval_bn = 1 - bn.cdf(k) + bn.pmf(k)
    pb = PoiBin(pp)
    pval_pb = pb.pval(k)
    assert np.all(
        np.around(pval_bn, decimals=10) == np.around(pval_pb, decimals=10))
예제 #14
0
def test_get_pmf_xi():
    """Test that the correct pmf elements are obtained."""
    p = [0.2, 0.5]
    pb = PoiBin(p)
    assert np.all(np.abs(pb.get_pmf_xi() - np.array([0.4, 0.5, 0.1])) < 1e-10)
    p = [0.3, 0.8]
    pb = PoiBin(p)
    assert np.all(
        np.abs(pb.get_pmf_xi() - np.array([0.14, 0.62, 0.24])) < 1e-10)
    p = [0.3, 0.8, 0.3]
    pb = PoiBin(p)
    assert np.all(
        np.abs(pb.get_pmf_xi() -
               np.array([0.098, 0.476, 0.354, 0.072])) < 1e-10)
예제 #15
0
def test_check_rv_input():
    """Test tat inputs are positive integers."""
    p = [1, 1]
    pb = PoiBin(p)
    assert pb.check_rv_input([1, 2])
    assert pb.check_rv_input(2)

    with pytest.raises(AssertionError):
        pb.check_rv_input(-1)
        pytest.fail("Input value cannot be negative.")
    with pytest.raises(AssertionError):
        pb.check_rv_input(1.7)
        pytest.fail("Input value must be an integer.")
예제 #16
0
 def _run_binom_test(self, alternative="null"):
     family = self._df_results["family"].values
     df1, df2, p, ncp33 = self._df_results[["df1", "df2", "p",
                                            "ncp33"]].to_numpy().T
     k_below_25 = self._n_tests['p025']
     if alternative == "null":
         return binom(n=self._n_tests['p05'], p=.5).sf(k_below_25 - 1)
     else:
         prop_below_25_33 = 3 * self._compute_prop_lower_33(
             .025, family, df1, df2, p, ncp33)
         prop_below_25_33_filtered = prop_below_25_33[p < .05]
         return PoiBin(prop_below_25_33_filtered).cdf(k_below_25)
예제 #17
0
def test_pmf():
    """Test the probability mass function.

    The outcomes of some results are compared with the poibin R package
    [Rpoibin]_.
    """
    p = [1, 1]
    pb = PoiBin(p)
    assert pb.pmf([1, 2]).size == 2

    # Compare results with the ones obtained with the R poibin package
    # [Rpoibin]_
    p = [0.4163448, 0.3340270, 0.9689613]
    pb = PoiBin(p)
    res = pb.pmf([0, 1, 2, 3])
    res_ref = np.array([0.0120647, 0.39129134, 0.46189012, 0.13475384])
    assert np.all(np.abs(res - res_ref) < 1e-8)

    p = [0.9955901, 0.5696224, 0.8272597, 0.3818746, 0.4290036, 0.8707646,
         0.8858267, 0.7557183]
    pb = PoiBin(p)
    res = pb.pmf([0, 2, 7, 8])
    res_ref = np.array([4.17079659e-07, 2.46250608e-03, 2.02460933e-01,
                        4.48023378e-02])
    assert np.all(np.abs(res - res_ref) < 1e-8)
예제 #18
0
def test_check_rv_input():
    """Test tat inputs are positive integers."""
    p = [1, 1]
    pb = PoiBin(p)
    assert pb.check_rv_input([1, 2])
    assert pb.check_rv_input(2)

    with pytest.raises(AssertionError,
                       message="Input value cannot be negative."):
        pb.check_rv_input(-1)
    with pytest.raises(AssertionError,
                       message="Input value must be an integer."):
        pb.check_rv_input(1.7)
예제 #19
0
def test_cdf_accuracy():
    """Compare accuracy of the cumulative distribution function.

    Compare the results with the ones obtained with the R poibin package
    [Rpoibin]_.
    """
    p = [0.1, 0.1]
    pb = PoiBin(p)
    assert np.all(np.abs(pb.cdf([0, 2]) - np.array([0.81, 1.])) < 1e-10)
    p = [0.5, 1.0]
    pb = PoiBin(p)
    assert np.all(np.abs(pb.cdf([1, 2]) == np.array([0.5, 1.])) < 1e-10)
    p = [0.1, 0.5]
    pb = PoiBin(p)
    assert np.all(np.abs(pb.cdf([0, 1, 2]) == np.array([0.45, 0.95, 1.])) <
                  1e-10)
    p = [0.1, 0.5, 0.7]
    pb = PoiBin(p)
    assert np.all(np.abs(pb.cdf([0, 1, 2]) == np.array([0.135, 0.6, 0.965])) <
                  1e-10)
예제 #20
0
def test_get_pmf_xi():
    """Test that the correct pmf elements are obtained."""
    p = [0.2, 0.5]
    pb = PoiBin(p)
    assert np.all(np.abs(pb.get_pmf_xi() - np.array([0.4, 0.5, 0.1])) <
                  1e-10)
    p = [0.3, 0.8]
    pb = PoiBin(p)
    assert np.all(np.abs(pb.get_pmf_xi() - np.array([0.14, 0.62, 0.24])) <
                  1e-10)
    p = [0.3, 0.8, 0.3]
    pb = PoiBin(p)
    assert np.all(np.abs(pb.get_pmf_xi() - np.array([0.098, 0.476, 0.354,
                                                     0.072])) < 1e-10)
예제 #21
0
def test_skew_pb_binom():
    """Compare the skew function with the binomial limit case."""
    # For equal probabilites p_j, the Poisson Binomial distribution reduces
    # to the Binomial one:
    p = [0.5, 0.5, 0.5, 0.5]
    pb = PoiBin(p)
    bn = binom(n=4, p=p[0])

    # Compare to four digits behind the comma
    assert int(bn.stats(moments='s') * 10000) == int(pb.skew() * 10000)

    # For different probabilities p_j, the Poisson Binomial distribution and
    # the Binomial distribution are different:
    pb = PoiBin([0.5, 0.5, 0.8, 0.8])
    bn = binom(4, p=0.5)
    assert int(bn.stats(moments='s') * 10000) != int(pb.skew() * 10000)
예제 #22
0
def test_pmf_pb_binom():
    """Compare the probability mass function with the binomial limit case."""
    # For equal probabilites p_j, the Poisson Binomial distribution reduces to
    # the Binomial one:
    p = [0.5, 0.5]
    pb = PoiBin(p)
    bn = binom(n=2, p=p[0])

    # Compare to four digits behind the comma
    assert int(bn.pmf(0) * 10000) == int(pb.pmf(0) * 10000)

    # For different probabilities p_j, the Poisson Binomial distribution and
    # the Binomial distribution are different:
    pb = PoiBin([0.5, 0.8])
    bn = binom(2, p=0.5)
    assert int(bn.pmf(0) * 10000) != int(pb.pmf(0) * 10000)
예제 #23
0
def test_argmax_pb_binom():
    """Compare the amax function with the binomial limit case."""
    # For equal probabilites p_j, the Poisson Binomial distribution reduces
    # to the Binomial one:
    p = [0.5, 0.5, 0.5, 0.5]
    pb = PoiBin(p)
    bn = binom(n=4, p=p[0])

    cases = [0, 1, 2, 3, 4]

    # Compare to four digits behind the comma
    assert int(np.argmax(bn.pmf(cases)) * 10000) == int(pb.argmax() * 10000)

    # For different probabilities p_j, the Poisson Binomial distribution and
    # the Binomial distribution are different:
    pb = PoiBin([0.5, 0.5, 0.8, 0.8])
    bn = binom(4, p=0.5)
    assert int(np.argmax(bn.pmf(cases)) * 10000) != int(pb.argmax() * 10000)
예제 #24
0
def test_argmax():
    """Test amax function."""
    p = [0.1, 0.1, 0.1, 0.9, 0.9, 0.9]
    pb = PoiBin(p)
    assert (pb.amax() - np.array([0.59122])) < 4 * np.finfo(float).eps
예제 #25
0
def test_cdf_accuracy():
    """Compare accuracy of the cumulative distribution function.

    Compare the results with the ones obtained with the R poibin package
    [Rpoibin]_.
    """
    p = [0.1, 0.1]
    pb = PoiBin(p)
    assert np.all(np.abs(pb.cdf([0, 2]) - np.array([0.81, 1.])) < 1e-10)
    p = [0.5, 1.0]
    pb = PoiBin(p)
    assert np.all(np.abs(pb.cdf([1, 2]) == np.array([0.5, 1.])) < 1e-10)
    p = [0.1, 0.5]
    pb = PoiBin(p)
    assert np.all(
        np.abs(pb.cdf([0, 1, 2]) == np.array([0.45, 0.95, 1.])) < 1e-10)
    p = [0.1, 0.5, 0.7]
    pb = PoiBin(p)
    assert np.all(
        np.abs(pb.cdf([0, 1, 2]) == np.array([0.135, 0.6, 0.965])) < 1e-10)
예제 #26
0
def test_skew():
    """Test skew function."""
    p = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
    pb = PoiBin(p)
    assert (pb.skew() - np.array([0.1941243876059742])) < \
        4 * np.finfo(float).eps
예제 #27
0
def test_get_cdf():
    """Test that the right cumulative distribution function is obtained."""
    p = [1, 1]
    pb = PoiBin(p)
    assert np.all(pb.get_cdf([1, 1, 1]) == np.array([1., 2., 3.]))
예제 #28
0
# uses output by ANGSD run with options "-doPost 2 -doGeno 11"
# output: contig, position, number non-missing samples, number "hard-called" heterozygotes,
#         expected num heterozygotes, probability of heterozygote majority
#         (last behaves slightly differently for odd/even numbers of samples)
# requires numpy, poibin, scipy
# put provided poibin.py into your PYTHONPATH location

from sys import stdin, stdout
from numpy import array, array_split, exp
from scipy.misc import logsumexp
from poibin import PoiBin
for line in stdin:
    line = line.strip().split("\t")
    chrom, pos, gl = line[0], line[1], array_split(array(line[4:], "float"),
                                                   (len(line) - 4) / 4)
    pr_heteroz = array([x[2] for x in gl if not x[0] < 0.])
    num_heteroz = sum([int(x[0]) for x in gl if x[0] == 1])
    h_expected = sum(pr_heteroz) / len(pr_heteroz)
    try:
        pois_binom = PoiBin(pr_heteroz)
        utail_prob = pois_binom.pval(len(pr_heteroz) / 2 + 1)
    except:
        utail_prob = 'NaN'
    stdout.write("\t".join([
        chrom, pos,
        str(len(pr_heteroz)),
        str(num_heteroz),
        str(h_expected),
        str(utail_prob)
    ]) + "\n")
예제 #29
0
# fuzzy calculator of probability that heterozygotes constitute 
# the majority of calls for a given site
# (for filtering out lumped paralogs)
# by Nathaniel "Nate" S. Pope ([email protected])

# uses output by ANGSD run with options "-doPost 2 -doGeno 11"
# output: contig, position, number non-missing samples, number "hard-called" heterozygotes, 
#         expected num heterozygotes, probability of heterozygote majority 
#         (last behaves slightly differently for odd/even numbers of samples)
# requires numpy, poibin, scipy
# put provided poibin.py into your PYTHONPATH location

from sys import stdin, stdout
from numpy import array, array_split, exp
from scipy.misc import logsumexp
from poibin import PoiBin
for line in stdin:
  line = line.strip().split("\t")
  chrom, pos, gl = line[0], line[1], array_split(array(line[4:], "float"), (len(line)-4)/4)
  pr_heteroz = array([x[2] for x in gl if not x[0]<0.])
  num_heteroz = sum([int(x[0]) for x in gl if x[0]==1])
  h_expected = sum(pr_heteroz)/len(pr_heteroz)
  try:
    pois_binom = PoiBin(pr_heteroz)
    utail_prob = pois_binom.pval(len(pr_heteroz)/2+1)
  except:
    utail_prob = 'NaN'
  stdout.write("\t".join([chrom, pos, str(len(pr_heteroz)), str(num_heteroz), str(h_expected), str(utail_prob)]) + "\n")
예제 #30
0
def test_get_cdf():
    """Test that the right cumulative distribution function is obtained."""
    p = [1, 1]
    pb = PoiBin(p)
    assert np.all(pb.get_cdf([1, 1, 1]) == np.array([1., 2., 3.]))
예제 #31
0
def test_mean():
    """Test mean function."""
    p = [0, 0, 0, 1, 1, 1]
    pb = PoiBin(p)
    assert (pb.mean() == np.array([3]))
예제 #32
0
def test_var():
    """Test mean function."""
    p = [0.1, 0.1, 0.1, 0.9, 0.9, 0.9]
    pb = PoiBin(p)
    assert (pb.var() == np.array([0.54]))
예제 #33
0
def test_cdf():
    """Test the cumulative distribution function."""
    p = [1, 1]
    pb = PoiBin(p)
    assert np.all(pb.cdf([1, 2]) - np.array([0., 1.]) < 4 * np.finfo(float).eps)
    assert (pb.cdf(2) - 1.) < 4 * np.finfo(float).eps
for i in range(len(justices)):
    feature_master = pd.DataFrame.from_records(feature_info_master, columns = feature_columns_info_master)
    feature_master.to_csv('OutcomeReport_{}_FeatureImportInfo{}.csv'.format(unique_report, current_justice), mode = 'w+')


master_probas = master_probas.fillna(2)
ps = dict.fromkeys(list(master_probas.index.values), 0)
for ind, row in master_probas.iterrows():
    lista = []
    for c in master_probas.columns:
        if row[c] != 2:
            lista.append(row[c])
    ps[ind] = lista
outcomes = {}
for k in ps.keys():
    pb = PoiBin(ps[k])
    if len(ps[k]) == 9:
        outcomes[k] = sum(pb.pmf([5, 6, 7, 8, 9]))
    elif len(ps[k]) == 8:
        outcomes[k] = sum(pb.pmf([5, 6, 7, 8]))
    elif len(ps[k]) == 7:
        outcomes[k] = sum(pb.pmf([4, 5, 6, 7]))
    elif len(ps[k]) == 6:
        outcomes[k] = sum(pb.pmf([4, 5, 6]))
    elif len(ps[k]) == 5:
        outcomes[k] = sum(pb.pmf([3, 4, 5]))
    elif len(ps[k]) == 4:
        outcomes[k] = sum(pb.pmf([3, 4]))
    elif len(ps[k]) == 3:
        outcomes[k] = sum(pb.pmf([2, 3]))
    elif len(ps[k]) == 2:
예제 #35
0
 def calculate_pov_exact(self):
     self.theta_T = round_probabilities(self.theta_T)
     pb = PoiBin(self.theta_T)
     return 1 - pb.cdf(math.floor(self.n/2))
예제 #36
0
def test_std():
    """Test mean function."""
    p = [0.1, 0.1, 0.1, 0.9, 0.9, 0.9]
    pb = PoiBin(p)
    assert (pb.std() == np.sqrt(0.54))