Exemple #1
0
def binom_conf_interval(n,
                        x,
                        cl=0.975,
                        alternative="two-sided",
                        p=None,
                        **kwargs):
    """
    Compute a confidence interval for a binomial p, the probability of success in each trial.

    Parameters
    ----------
    n : int
        The number of Bernoulli trials.
    x : int
        The number of successes.
    cl : float in (0, 1)
        The desired confidence level.
    alternative : {"two-sided", "lower", "upper"}
        Indicates the alternative hypothesis.
    p : float in (0, 1)
        Starting point in search for confidence bounds for probability of success in each trial.
    kwargs : dict
        Key word arguments

    Returns
    -------
    tuple
        lower and upper confidence level with coverage (approximately)
        1-alpha.

    Notes
    -----
    xtol : float
        Tolerance
    rtol : float
        Tolerance
    maxiter : int
        Maximum number of iterations.
    """
    from scipy.optimize import brentq
    from scipy.stats import binom, hypergeom

    assert alternative in ("two-sided", "lower", "upper")

    if p is None:
        p = x / n
    ci_low = 0.0
    ci_upp = 1.0

    if alternative == 'two-sided':
        cl = 1 - (1 - cl) / 2

    if alternative != "upper" and x > 0:
        f = lambda q: cl - binom.cdf(x - 1, n, q)
        ci_low = brentq(f, 0.0, p, *kwargs)
    if alternative != "lower" and x < n:
        f = lambda q: binom.cdf(x, n, q) - (1 - cl)
        ci_upp = brentq(f, 1.0, p, *kwargs)

    return ci_low, ci_upp
def binom_test_v2(x, n=None, p=0.5, alternative='two-sided'):
    n = np.int_(n)
    if (p > 1.0) or (p < 0.0):
        raise ValueError("p must be in range [0,1]")

    if alternative not in ('two-sided', 'less', 'greater'):
        raise ValueError(
            "alternative not recognized should be 'two-sided', 'less' or 'greater'"
        )
    if alternative == 'less':
        pval = binom.cdf(x, n, p)
        return pval
    if alternative == 'greater':
        pval = binom.sf(x - 1, n, p)
        return pval
    d = binom.pmf(x, n, p)
    rerr = 1 + 1e-7
    a_fn = lambda x1: binom.pmf(x1, n, p)
    if x == p * n:
        pval = 1.
    elif x < p * n:
        y = n - binary_search(a_fn, d * rerr, np.ceil(p * n), n) + 1
        pval = (binom.cdf(x, n, p) + binom.sf(n - y, n, p))
    else:
        y = binary_search(a_fn, d * rerr, 0, np.floor(p * n) + 1, True) + 1
        pval = (binom.cdf(y - 1, n, p) + binom.sf(x - 1, n, p))
    return min(1.0, pval)
Exemple #3
0
def Gupper(theta,y,n,j):
    y=np.asarray(y).copy()
    n=np.asarray(n).copy()
    
    if(j==len(y)-1): return (binom.cdf(k=y[j],n=n[j],p=theta))
    
    return (binom.cdf(k=y[j]-1,n=n[j],p=theta)+binom.pmf(k=y[j],n=n[j],p=theta)*Gupper(theta=theta,y=y,n=n,j=j+1))
Exemple #4
0
def quantile_interval(level, n, p):
    """
    Calculate quantiles for confidence interval endpoints.
    Example: quantile_interval(0.90, 500, 0.90) gives the lower and upper endpoints (indices)
    for the 90th percentile of a probability distribution based on a sample of size 500.

    :param level: (float) confidence interval level between 0 and 1
    :param n: (int) sample size
    :param p: (float) percentile of interest between 0 and 1
    """
    # TODO: need to check corner cases
    l = int(n * p)
    r = l
    dens = 0
    while dens < level:
        prob_l = binom.pmf(l - 1, n, p)
        prob_r = binom.pmf(r + 1, n, p)
        if prob_l > prob_r:
            l += - 1
            if l < 0:
                l = 0
                break
        else:
            r += 1
            if r >= n:
                r = n - 1
                break
        dens = binom.cdf(r, n, p) - binom.cdf(l, n, p)
    return l, r
def CDF_error_analytic_bootstrap(n, target_quantile, quantile_quantile):
    target_count = int(target_quantile * float(n))

    # Start off with a binary search
    small_ind = 0
    big_ind = n - 1
    small_prob = 1 - binom.cdf(target_count, n, 0)
    big_prob = 1 - binom.cdf(target_count, n, float(big_ind) / float(n))

    while big_ind - small_ind > 4:
        mid_ind = (big_ind + small_ind) / 2
        mid_prob = 1 - binom.cdf(target_count, n, float(mid_ind) / float(n))
        if mid_prob > quantile_quantile:
            big_prob = mid_prob
            big_ind = mid_ind
        else:
            small_prob = mid_prob
            small_ind = mid_ind

            # Finish it off with a linear search
    prob_closest = -100
    for p_num in xrange(small_ind, big_ind + 1):
        p = float(p_num) / float(n)
        coCDF_prob = 1 - binom.cdf(target_count, n, p)
        if abs(coCDF_prob - quantile_quantile) < abs(prob_closest - quantile_quantile):
            prob_closest = coCDF_prob
            prob_index = p_num

    return (prob_index)
def CDF_error_analytic_bootstrap(n, target_quantile, quantile_quantile):
    target_count = int(target_quantile * float(n))

    # Start off with a binary search
    small_ind = 0
    big_ind = n - 1
    small_prob = 1 - binom.cdf(target_count, n, 0)
    big_prob = 1 - binom.cdf(target_count, n, float(big_ind) / float(n))

    while big_ind - small_ind > 4:
        mid_ind = (big_ind + small_ind) / 2
        mid_prob = 1 - binom.cdf(target_count, n, float(mid_ind) / float(n))
        if mid_prob > quantile_quantile:
            big_prob = mid_prob
            big_ind = mid_ind
        else:
            small_prob = mid_prob
            small_ind = mid_ind

    # Finish it off with a linear search
    prob_closest = -100
    for p_num in xrange(small_ind, big_ind + 1):
        p = float(p_num) / float(n)
        coCDF_prob = 1 - binom.cdf(target_count, n, p)
        if abs(coCDF_prob - quantile_quantile) < abs(prob_closest -
                                                     quantile_quantile):
            prob_closest = coCDF_prob
            prob_index = p_num

    return (prob_index)
def calc_probabilities(prevalence_per_100k=1,
                       days_of_no_transmission_threshold=28,
                       num_tests=4,
                       r0=1,
                       generation_interval=4.7,
                       high_prev_pop_rel_likelihood=1,
                       high_prev_testing_proportion=.1):

    prevalance_per_100k = simple_exponential_growth(
        initial_population=prevalence_per_100k,
        r_eff=r0,
        num_days=days_of_no_transmission_threshold,
        generation_interval=generation_interval)

    prevalance_per_100k = list(prevalance_per_100k)
    num_in_10k_list, num_in_90k_list = population_split(
        prevalance_per_100k, high_prev_pop_rel_likelihood)
    pr_detect_10k = 1 - np.prod([
        binom.cdf(0, high_prev_testing_proportion * num_tests * 1000,
                  current_prev / 10000) for current_prev in num_in_10k_list
    ])

    pr_detect_90k = 1 - np.prod([
        binom.cdf(0, (1 - high_prev_testing_proportion) * num_tests * 1000,
                  current_prev / 90000) for current_prev in num_in_90k_list
    ])

    pr_detect = 1 - (1 - pr_detect_10k) * (1 - pr_detect_90k)

    return pr_detect
Exemple #8
0
def expctd_cond_leq_m(m, n, p):
    if m <= int(n / 2):
        return sum(binom.pmf(np.arange(m+1),n,p)\
            /binom.cdf(m,n,p)*np.arange(m+1))
    else:
        return n*p/binom.cdf(m,n,p)-binom.sf(m,n,p)\
                /binom.cdf(m,n,p)*expctd_cond_gr_m(m,n,p)
def LogLikelihoodNgramsMultiple(Bow_unique, Bow_Ngrams, Bow_ngrams, N):
    """
    Log Likelihood of having the last word of the N-gram considering the previous sequence,
    e.g., for the sequence [W1, W2, W3, W4], what is the probability of having W4 when there
    is a sequence of W1, W2 and W3.
    :param Bow_unique: Individual word frequency
    :param Bow_Ngrams: N-gram word frequency
    :param Bow_ngrams: N-gram - 1 word frequency
    :param N: total number of N-grams
    :return: probabilities
    """
    Prob = {}
    for N_gram in Bow_Ngrams:
        individual_words = N_gram.split(" ")
        count_w1 = Bow_ngrams[" ".join(word for word in individual_words[:-1])]
        count_w2 = Bow_unique[individual_words[-1]]
        count_w12 = Bow_Ngrams[N_gram]

        p = count_w2 / N
        p1 = count_w12 / count_w1
        p2 = (count_w2 - count_w12) / (N - count_w1)

        # Calculate individual binomial probabilities
        pbinom1 = binom.cdf(count_w12, count_w1, p)
        pbinom2 = binom.cdf(count_w2 - count_w12, N - count_w1, p)
        pbinom3 = binom.cdf(count_w12, count_w1, p1)
        pbinom4 = binom.cdf(count_w2 - count_w12, N - count_w1, p)

        # Log likelihood
        LL_i = np.log((pbinom1 * pbinom2) / (pbinom3 * pbinom4))
        Prob[N_gram] = {"LL": LL_i, "p1": p1, "p2": p2, "p": p}

    return Prob
def logLikelihoodNgrams(BOW_unique, BOW_grams, N):
    """
    Calculates the log likelihood probabilities for each ngrams.
    :param BOW_unique: dictionnary with each word and the corresponding counts
    :param BOW_grams: dictionnary with each n-gram and the corresponding counts
    :param N: value of the total number of n-grams
    :return: dictionnary with probabilities for each n-gram
    """

    Prob = {}

    for n_gram in BOW_grams:
        count_w1 = BOW_unique[n_gram.split(" ")[0]]
        print(count_w1)
        count_w2 = BOW_unique[n_gram.split(" ")[1]]
        count_w12 = BOW_grams[n_gram]

        p = count_w2 / N
        p1 = count_w12 / count_w1
        p2 = (count_w2 - count_w12) / (N - count_w1)

        #Calculate individual binomial probabilities
        pbinom1 = binom.cdf(count_w12, count_w1, p)
        pbinom2 = binom.cdf(count_w2 - count_w12, N - count_w1, p)
        pbinom3 = binom.cdf(count_w12, count_w1, p1)
        pbinom4 = binom.cdf(count_w2 - count_w12, N - count_w1, p)

        # Log likelihood
        LL_i = np.log((pbinom1 * pbinom2) / (pbinom3 * pbinom4))
        Prob[n_gram] = {"LL": LL_i, "p1": p1, "p2": p2, "p": p}

    return Prob
Exemple #11
0
def test_binomial_p():
    """Binomial Test."""
    np.testing.assert_almost_equal(binomial_p(5, 10, 0.5, 'greater'),
                                   1 - binom.cdf(4, 10, 0.5))
    np.testing.assert_almost_equal(binomial_p(5, 10, 0.5, 'less'),
                                   binom.cdf(5, 10, 0.5))
    np.testing.assert_almost_equal(binomial_p(5, 10, 0.5, 'two-sided'), 1)
Exemple #12
0
 def naive_call(self, player_id):
     r = self.sim_rollout[player_id][0] + self.sim_rollout[player_id]
     r[0] = self.sim_rollout[player_id][0]
     N = sum(self.dice)
     other_dice = N - self.dice[player_id]
     if self.last_bid[0] > r[self.last_bid[1]] + other_dice:
         return [0]
     p_call_liar = binom.cdf(self.last_bid[0] - r[self.last_bid[1]] - 1,
                             other_dice,
                             1 / 6 + (self.last_bid[1] != 0) / 6)
     odds = np.zeros((1 + N, 6))
     lower_lim = get_legit_bids(self.last_bid)
     for i in range(6):
         p = 1 / 6 + (i != 0) / 6
         upper = int(binom.isf(0.15, other_dice, p)) + r[i]
         lower = lower_lim[i]
         odds[lower:upper + 1, i] = (
             1 - binom.cdf(np.arange(-r[i] - 1, -r[i] + N), other_dice, p) *
             binom.cdf(np.arange(-1, N), N, p))[lower:upper + 1]
     if p_call_liar > 0.7 or np.random.sample() < p_call_liar / (
             p_call_liar + np.sum(odds)):
         return [0]
     else:
         odds = (odds**3).flatten()
         odds /= np.sum(odds)
         #print('odd',odds)
         index = np.random.choice(np.arange(len(odds)), p=odds)
         return [index // 6, index % 6]
Exemple #13
0
def transition_prob_naive(result, rollout, pre_bid, bid, call_belief):
    r = rollout[0] + rollout
    r[0] = rollout[0]
    N = len(result) - 1  #num of total dice
    other_dice = N - sum(rollout)
    odds = np.zeros((1 + N, 6))
    if pre_bid is not None:
        lower_lim = get_legit_bids(pre_bid)
    for i in range(6):
        p = 1 / 6 + (i != 0) / 6
        upper = int(binom.isf(0.15, other_dice, p)) + r[i]
        if pre_bid is None:
            lower = max(1, int(binom.isf(0.85, other_dice, p)) + r[i])
        else:
            lower = max(1, lower_lim[i])
        odds[lower:upper + 1,
             i] = (1 -
                   binom.cdf(np.arange(-r[i] - 1, -r[i] + N), other_dice, p) *
                   binom.cdf(np.arange(-1, N), N, p))[lower:upper + 1]
    odds = odds**(3 + int(9 / other_dice**2))
    #    if sum(rollout)==1:
    #        print(rollout,pre_bid,bid)
    #        print(odds)
    if odds[bid[0], bid[1]] == 0:
        return 0
    return odds[bid[0], bid[1]] / np.sum(odds)
Exemple #14
0
def generate_mapping(lines, answers, combination):
    answers = np.array(answers)
    p = len(answers[answers]) / float(len(answers))
    #print "True questions fraction: {0}".format(p)

    letter_freq = su.calc_freq_over_cols(lines, combination)

    correlation_list = su.calc_correlations(lines, answers, combination)

    sorted_corr_indices = np.argsort(correlation_list)[::-1]
    cur_freq = 0.0
    added_to_positive = 0
    k = len(combination)
    halfK = k / 2.0

    cur_proba = 1.0 - binom.cdf(halfK, k, cur_freq)
    while cur_proba < p:
        cur_freq += letter_freq[su.alphabet[
            sorted_corr_indices[added_to_positive]]]
        added_to_positive += 1
        cur_proba = 1.0 - binom.cdf(halfK, k, cur_freq)

    mapping = {}
    for i in range(added_to_positive):
        mapping[su.alphabet[sorted_corr_indices[i]]] = 1.0
    for i in range(added_to_positive, len(su.alphabet)):
        mapping[su.alphabet[sorted_corr_indices[i]]] = 0.0
    return mapping
Exemple #15
0
 def Warranty(self, sales_volume, cost_warranty):
     self.sales_volume = sales_volume
     self.cost_warranty = cost_warranty
     print('Compute warranty cost for ' + self.name + ' RDT')
     failureprob = sum(
         binom.cdf(k=self.c, n=self.n, p=self.pi) * self.pi) / sum(
             binom.cdf(k=self.c, n=self.n, p=self.pi))
     return self.cost_warranty * failureprob * self.sales_volume, failureprob
    def _team_independent_start_pct(self, week):
        def projected_leaders(positions, league=self.league):
            return sorted(league.player_universe.values(),
                          key=lambda player: player.weekly_points(week)
                          if player.weekly_points(week) and player.position in
                          positions else 0.0,
                          reverse=True)

        week_injury_rate = self.league.injury_simulations[self.position][week -
                                                                         1]
        weekly_projected_leaders = projected_leaders([self.position])

        player_rank = weekly_projected_leaders.index(self)

        total_starting_players = (self.league.roster_settings[self.position] *
                                  self.league.roster_settings['teams'])
        non_flex_pct = 1 if player_rank < total_starting_players else (
            1 - binom.cdf(
                # Probability that few enough players ahead get hurt to prevent
                # player from starting
                player_rank - total_starting_players,
                player_rank,
                week_injury_rate))
        if (self.position not in self.league.roster_settings['flex_positions']
                or non_flex_pct == 1
                or self.league.roster_settings['flex'] == 0):
            self.independent_start_pcts[week] = non_flex_pct
            return non_flex_pct
        else:
            weekly_projected_leaders = projected_leaders(
                self.league.roster_settings['flex_positions'])

            player_rank = weekly_projected_leaders.index(self)

            total_flex_eligible_starters = (sum([
                self.league.roster_settings[pos]
                for pos in self.league.roster_settings['flex_positions']
            ]) * self.league.roster_settings['teams'])

            injury_rates = []
            injury_rate_weights = []
            for pos in self.league.roster_settings['flex_positions']:
                injury_rates.append(self.league.injury_simulations[pos][week -
                                                                        1])
                injury_rate_weights.append(self.league.roster_settings[pos])

            flex_pct = 1 if player_rank < total_flex_eligible_starters else (
                1 - binom.cdf(
                    # Probability that few enough players ahead get hurt to
                    # prevent player from starting
                    player_rank - total_flex_eligible_starters,
                    player_rank,
                    np.average(injury_rates, weights=injury_rate_weights)))

            start_pct = (1 - non_flex_pct) * flex_pct + non_flex_pct
            self.independent_start_pcts[week] = start_pct
            return start_pct
Exemple #17
0
def confidence_variation(times, quantile, confidence_interval):
    """
    Calculate the confidence interval
    :param times: The list of times for the calculation
    :param quantile: the quantile we are going to use
    :param confidence_interval: The confidence interval
    :return: confidence_variation, lower confidence, upper confidence
    """

    assert isinstance(times, list)
    assert 0 < quantile < 1, "Quantile value is " + str(
        quantile) + "which should be between 0 and 1"
    assert 0.5 < confidence_interval < 1, "Desired confidence interval should be between 0.5 and 1"

    sorted_times = sorted(remove_outliers(times))
    q = mquantiles(sorted_times, quantile)[0]
    n = len(sorted_times)

    # This should not happen, just for debugging purposes
    if not n:
        print(times)

    confidence = 0
    middle = round(quantile * (n + 1))
    ui = middle
    li = middle
    while confidence < confidence_interval:
        if ui < n - 1:
            ui = ui + 1
        if li > 0:
            li = li - 1
        confidence = binom.cdf(ui - 1, n, quantile) - binom.cdf(
            li - 1, n, quantile)

        if ui >= n - 1 and li <= 0:
            break

    if ui >= n - 1:
        ui = n - 1
    if li <= 0 or li > ui:
        li = 0

    try:
        lower_range = sorted_times[li]
        upper_range = sorted_times[ui]
    except IndexError:
        # This should not happen. Just for debugging purposes
        print("Lower range", li)
        print("Upper range", ui)
        print("List length", len(sorted_times))
        sys.exit(1)
        pass

    confidence_range = upper_range - lower_range

    return (confidence_range / q) * 100, lower_range, upper_range
Exemple #18
0
    def get_batch_artefacts(self, gp):
        # using binom, and gnomad_af as p, to produce probability to help identify batch specific artefacts
        # lower_bound is there to remove cohorts where there is just one patient
        # zero_gnomad_c_cutoff allows max internal count when gnomad_af is 0
        dt_d = defaultdict(Counter)
        dt_r = defaultdict(Counter)
        cohorts = Counter()
        for k, v in gp['patients'].items():
            cohorts[self.patient_mini[k]['contact']] += 1
            vc = Counter(v['variants'])
            for i in vc:
                dt_d[self.patient_mini[k]['contact']][i] += 1
                if vc[i] > 1:
                    dt_r[self.patient_mini[k]['contact']][i] += 1
        # remove cohorts with count lower than lower_bound
        for k in cohorts.keys():
            if cohorts[k] < self.lower_bound:
                del cohorts[k]
                del dt_d[k]
                if k in dt_r:
                    del dt_r[k]

        # for heterozygous variants
        result_d = defaultdict(list)
        for k1, v1 in dt_d.items():
            n_variants = len(v1)
            for k2, v2 in v1.items():
                if not gp['variants'][k2]['gnomad_af']:
                    if v2 > self.zero_gnomad_c_cutoff:
                        result_d[k1].append(k2)
                    continue
                prob = 1 - binom.cdf(v2 - 1, cohorts[k1],
                                     gp['variants'][k2]['gnomad_af'])
                if prob < self.binom_cutoff / n_variants:
                    #print(k2,prob)
                    result_d[k1].append(k2)
        for k in result_d:
            result_d[k] = set(result_d[k])

        # for homozygous variants
        result_r = defaultdict(list)
        for k1, v1 in dt_r.items():
            n_variants = len(v1)
            for k2, v2 in v1.items():
                if not gp['variants'][k2]['gnomad_hom_af']:
                    if v2 > self.zero_gnomad_c_cutoff:
                        result_r[k1].append(k2)
                    continue
                prob = 1 - binom.cdf(v2 - 1, cohorts[k1],
                                     gp['variants'][k2]['gnomad_hom_af'])
                if prob < self.binom_cutoff / n_variants:
                    #print(k2,prob)
                    result_r[k1].append(k2)
        for k in result_r:
            result_r[k] = set(result_r[k])
        return {'d': result_d, 'r': result_r}
def CRR_formula_put(S0, K, T, u, d, r):
    """
    CRR_formula_put(S0, K, T, u, d, r) = initial price of a call option using CRR formula

    CRR formula = (K/R^T)*phi(A;T,q) - S_0*phi(A;T,q_dash)

    where: 
    phi(_;_,_) is a binomial distribution function
    A = minimum number of upmoves for the option to be in the money
    q_dash = q*(U/D)
    q = risk neutral probability
    S0 = initial asset price
    K = strike pirce
    T = expiry time
    u = up
    d = down
    r = fixed interest rate

    """

    # import modules
    import numpy as np
    from scipy.stats import binom

    # check input paramters
    if S0 <= 0.0 or d <= -1 or d >= u or r <= -1:
        print("Invalid input arguments")
        print("Terminating program")
        return (1)

    # check for arbitrage
    if not (d < r < u):
        print("Model contains arbitrage")
        print("Terminating program")
        return (1)

    # import modules
    import math
    import numpy as np
    from scipy.stats import binom

    # crr variables
    U = 1 + u
    D = 1 + d
    R = 1 + r

    # calculating terms: q, q_dash, A
    q = (R - D) / (U - D)
    q_dash = ((R - D) / (U - D)) * (U / R)
    A = math.ceil((np.log(K / (S0 * D**T)) / np.log(U / D))) - 1

    # price of call option
    price = (K / (R**T)) * binom.cdf(A, T, q) - \
        S0 * binom.cdf(A, T, q_dash)

    return (price)
Exemple #20
0
def test_row(row):
    af1 = row['af1']
    depth1 = row['depth1']
    depth2 = row['depth2']
    p = af1
    if p == 1.0:
        p = 0.99
    pval1 = binom.cdf(0, depth1, p)
    pval2 = binom.cdf(0, depth2, p)
    return pval1 < 0.05 and pval2 < 0.05
Exemple #21
0
def hypergeom_conf_interval(n, x, N, cl=0.975, alternative="two-sided", G=None,
                        **kwargs):
    """
    Compute a confidence interval for a hypergeometric G, the number of good objects in the population.

    Parameters
    ----------
    n : int
        The number of Bernoulli trials.
    x : int
        The number of "good" objects in the sample.
    N : int
        The number of objects in the population
    cl : float in (0, 1)
        The desired confidence level.
    alternative : {"two-sided", "lower", "upper"}
        Indicates the alternative hypothesis.
    G : int in [0, N]
        Starting point in search for confidence bounds for hypergeometric G.
    kwargs : dict
        Key word arguments

    Returns
    -------
    tuple
        lower and upper confidence level with coverage (at least)
        1-alpha.

    Notes
    -----
    xtol : float
        Tolerance
    rtol : float
        Tolerance
    maxiter : int
        Maximum number of iterations.
    """
    assert alternative in ("two-sided", "lower", "upper")

    if G is None:
        G = (x / n)*N
    ci_low = 0.0
    ci_upp = N

    if alternative == 'two-sided':
        cl = 1 - (1-cl)/2

    if alternative != "upper" and x > 0:
        f = lambda q: cl - binom.cdf(x - 1, n, q)
        ci_low = brentq(f, 0.0, p, *kwargs)
    if alternative != "lower" and x < n:
        f = lambda q: binom.cdf(x, n, q) - (1 - cl)
        ci_upp = brentq(f, 1.0, p, *kwargs)

    return ci_low, ci_upp
Exemple #22
0
def CRR_formula_strap(S0, K, T, u, d, r):
    """
    
    CRR_formula_strap(S0, K, T, u, d, r) = initial price of a strap option using CRR formula

    CRR formula = 2 * pi_c + pi_p

    where: 
    pi_c = price of a call option 
    pi_p = price of a put option 
    q = risk neutral probability
    S0 = initial asset price
    K = strike pirce
    T = expiry time
    u = up
    d = down
    r = fixed interest rate

    """

    # check input paramters
    if S0 <= 0.0 or d <= -1 or d >= u or r <= -1:
        print("Invalid input arguments")
        print("Terminating program")
        return (1)

    # check for arbitrage
    if not (d < r < u):
        print("Model contains arbitrage")
        print("Terminating program")
        return (1)

    # import modules
    import math
    import numpy as np
    from scipy.stats import binom

    # crr variables
    U = 1 + u
    D = 1 + d
    R = 1 + r

    # calculating terms: q, q_dash, A
    q = (R - D) / (U - D)
    q_dash = ((R - D) / (U - D)) * (U / R)
    A = math.floor((np.log(K / (S0 * D**T)) / np.log(U / D)))

    # price of put and call of call option
    pi_p = (K / (R**T)) * binom.cdf(A, T, q) - \
        S0 * binom.cdf(A, T, q_dash)
    pi_c = S0 * (1 - binom.cdf(A, T, q_dash)) - (K /
                                                 (R**T)) * (1 -
                                                            binom.cdf(A, T, q))

    return (pi_p + 2 * pi_c)
Exemple #23
0
def updateFOS(path, db, motifChrom="chr17", dgfCutoff=20, method="Binom"):#, flankWin=35):
    """calculate fos from discontinuous variableStep wiggle files
	with two method options:
		NSD/Binomial test"""
    mcollection = db["hg19"+motifChrom]
    print 'updating fos', motifChrom
    for infile in glob.glob(os.path.join(path,"wgEncodeUwDgf*_cut"+motifChrom)):
	#(wigpath,wigfile) = os.path.split(infile)
	#(wigfilename,ext) = os.path.splitext(infile)
	wigfilename = infile.split(motifChrom)[0]
	expName = "fos"
	ctName = wigfilename.split('EncodeUwDgf')[-1].split('Aln')[0]
	wigFile = open(infile,'rt')
	#wig = csv.reader(wigFile,delimiter='\t')
	#bwFile = os.path.join(path,wigfilename+'.bw')
	#countWig.compressVarWig(wigFile, expName, wigfilename)
	bwFile = wigfilename+motifChrom+'.bw'
	if not os.path.isfile(bwFile):
	    countWig.compressVarWig(wigFile, expName, wigfilename)
	coordDict, valuesDict = countWig.getBinVarCoord(bwFile,ctName)
	arrayDict = defaultdict(list)
	cursor = mcollection.find({"tf_name":{"$in": ["IRF3","MAFK","NFYA","SIN3A","ZNF384"]}})
		#{"tf_name": tfName,
#		"motif_score":{"$lt":1e-4},
#		"motif_genomic_regions_info.chr": motifChrom})
	for test in cursor:
	    if not motifChrom in arrayDict:
		arrayDict[motifChrom] = countWig.buildVarHist(motifChrom,coordDict,valuesDict,ctName)
	    xs, xvals, sums = arrayDict[motifChrom]
	    motifStart = test["genomic_region"]["start"]
	    motifEnd = test["genomic_region"]["end"]
	    flankWin = round((motifEnd - motifStart + 1)*1.75)
	    flankL = max(0, motifStart - flankWin)
	    flankR = motifEnd + flankWin
	    countTotL = countWig.queryHist(xs, xvals, sums, flankL, motifEnd, varWindow=True)[2]
	    countTotR = countWig.queryHist(xs, xvals, sums, motifStart, flankR, varWindow=True)[2]
	    countCent = countWig.queryHist(xs, xvals, sums, motifStart, motifEnd)[2]
	    count = countWig.queryHist(xs, xvals, sums, motifStart-100, motifEnd+100, varWindow=True)[2]
	    if method == "NSD":
		try:
		    fos = np.sqrt((count-countCent)/count)-np.sqrt(countCent/count)
		except ZeroDivisionError:
		    fos = 0 
	    elif method == "Binom":
		try:
		    fos = min(1 - binom.cdf(countCent,countTotL,float(motifEnd-motifStart)/(motifEnd-flankL)), 
			1 - binom.cdf(countCent,countTotR,float(motifEnd-motifStart)/(flankR-motifStart)))
		except ZeroDivisionError:
		    fos = 0

	    if fos > 0.95 and count-countCent > dgfCutoff:#(flankR-flankL-(motifEnd-motifStart)):
		mcollection.update({"_id":test["_id"]},{"$set":{"dgf.fos": fos}}, upsert = True)
	    	#print motifChrom+'\t'+str(motifStart)+'\t'+str(motifEnd)+'\t'+str(fos)
    return 0 	
Exemple #24
0
 def calculate_p_tie(row, n=12):
     pa = row[6]
     pb = row[7]
     a_wins = sum(
         binom.pmf(xa + 1, n, pa) * binom.cdf(xa, n, pb)
         for xa in range(n)
     )
     b_wins = sum(
         binom.pmf(xb + 1, n, pb) * binom.cdf(xb, n, pa)
         for xb in range(n)
     )
     return 1/(1 - a_wins - b_wins)
def BinModEUPut(S0, r, sigma, T, K, M):
    dt = T / M
    b = 0.5 * (np.exp(-r * dt) + np.exp((r + sigma**2) * dt))
    u = b + np.sqrt(b**2 - 1)
    d = 1 / u
    q = (np.exp(r * dt) - d) / (u - d)
    a = np.ceil((np.log(K / S0) - M * np.log(d)) / (np.log(u / d)))
    qtilde = q * u / np.exp(r * dt)

    V0 = K * np.exp(-r * T) * binom.cdf(a - 1, M, q) - S0 * binom.cdf(
        a - 1, M, qtilde)
    return V0
Exemple #26
0
def binom_conf_interval(n, x, cl=0.975, alternative="two-sided", p=None,
                        **kwargs):
    """
    Compute a confidence interval for a binomial p, the probability of success in each trial.

    Parameters
    ----------
    n : int
        The number of Bernoulli trials.
    x : int
        The number of successes.
    cl : float in (0, 1)
        The desired confidence level.
    alternative : {"two-sided", "lower", "upper"}
        Indicates the alternative hypothesis.
    p : float in (0, 1)
        Starting point in search for confidence bounds for probability of success in each trial.
    kwargs : dict
        Key word arguments

    Returns
    -------
    tuple
        lower and upper confidence level with coverage (approximately)
        1-alpha.

    Notes
    -----
    xtol : float
        Tolerance
    rtol : float
        Tolerance
    maxiter : int
        Maximum number of iterations.
    """
    assert alternative in ("two-sided", "lower", "upper")

    if p is None:
        p = x / n
    ci_low = 0.0
    ci_upp = 1.0

    if alternative == 'two-sided':
        cl = 1 - (1-cl)/2

    if alternative != "upper" and x > 0:
        f = lambda q: cl - binom.cdf(x - 1, n, q)
        ci_low = brentq(f, 0.0, p, *kwargs)
    if alternative != "lower" and x < n:
        f = lambda q: binom.cdf(x, n, q) - (1 - cl)
        ci_upp = brentq(f, 1.0, p, *kwargs)

    return ci_low, ci_upp
Exemple #27
0
def gammatest(comp, data, weights, num):
    # set up vector for quicker search
    # (select first sample)
    # a large number of times
    #   select random sample
    #   calculate normalized distance from last sample
    # sort distances
    # express expected gamma parameters
    # find the distance at which deviation from expected distr is largest
    # compute its significane level
    data = data[comp.feet]
    dim = len(comp.feet)
    acc = np.cumsum(weights)
    dists = [False]*num
    oldind = selectind(acc)
    ind = oldind
    wsum = 0
    for i in range(num):
        while ind == oldind:
            ind = selectind(acc)
        dist = sqvec(np.matmul(comp.ilower, np.subtract(data.iloc[ind], data.iloc[oldind])))
        w = weights[oldind] # Because weights[ind] is already accounted for in selection
        dists[i] = (dist, w)
        wsum += w
        oldind = ind
    dists.sort(key = lambda pr:pr[0])
    mnval = 0
    mnind = 0
    w = 0
    for i in range(num):
        (d, w0) = dists[i]
        w += w0
        p = gammainc(dim/2, d/4)
        # print((wsum, w, p))
        val = log(wsum*p/w)*w + log(wsum*(1-p)/(wsum-w))*(wsum-w) if p > 0 and p < 1 and w > 0 and w < wsum else 0
        dists[i] = (d, w, p, p*wsum - w, val) 
        if val<mnval and p<0.90 and p>0.05 and p*wsum > w:
            mnval = val
            mnind = i
    (d, w,z1,z2,z3) = dists[mnind]
    p = gammainc(dim/2, d/4)
    if mnval == 0:
        sig = 1.0
    elif w > wsum*p:
        n = ceil(w)
        nn = ceil(wsum)
        sig = binom.cdf((nn-n), nn, (1-p))
    else:
        n = floor(w)
        nn = ceil(wsum)
        sig = binom.cdf(n, nn, p)
    return (sig, dists)
Exemple #28
0
def evaluate_policy(new_policy, env, sample):
    env.time_step(new_policy)

    X_I, X_S = sample.X_I, sample.X_S
    currentV_I = sample.val_I
    currentV_L = sample.val_L

    meanX_S, meanX_I, meanX_R = env.sample_stochastic()
    errX_S, errX_I, errX_R = env.get_error()

    val_I = meanX_I
    val_L = new_policy

    lowXS = max(round(meanX_S - errX_S, 0), 0)
    uppXS = min(round(meanX_S + errX_S, 0), env.M)

    # lowXI=max(round(meanX_I-errX_I,0),0)
    # uppXI=min(round(meanX_I+errX_I,0),env.M)

    lowXR = max(round(meanX_R - errX_R, 0), 0)
    uppXR = min(round(meanX_R + errX_R, 0), env.M)

    lowI = max(X_S - uppXS, 0)
    uppI = min(X_S, X_S - lowXS)

    lowR = max(lowXR - (env.M - X_I - X_S), 0)
    uppR = min(X_I, uppXR - (env.M - X_I - X_S))

    for i in range(lowI, uppI):
        for j in range(lowR, uppR):

            probI = poisson.pmf(i, env.beta)
            probR = binom.pmf(j, uppR, env.gamma)

            if i == lowI:
                probI = poisson.cdf(i, env.beta)

            if i == uppI:
                probI = 1 - poisson.cdf(i - 1, env.beta)

            if j == lowR:
                probR = binom.cdf(j, uppR, env.gamma)
            if j == uppR:
                probR = 1 - binom.cdf(j - 1, uppR, env.gamma)

            val_I += 0.97 * probI * probR * currentV_I[X_I + i - j - 1,
                                                       X_S - i - 1]
            val_L += 0.97 * probI * probR * currentV_L[X_I + i - j - 1,
                                                       X_S - i - 1]

    objs = [val_I, val_L]
    return objs
Exemple #29
0
    def point_statistics(self, parent_region, test_region, point_region):
        cur = self.con.cursor()
        r = dict()

        basepair_query = 'select basepairs from genomic_region where name=?'

        cur.execute(basepair_query, [self.canonical_name(parent_region)])
        r['basepairs_in_parent_region'] = cur.fetchone()[0]

        cur.execute(basepair_query, [self.canonical_name(parent_region + '%' + test_region)])
        r['basepairs_in_test_region'] = cur.fetchone()[0]

        cur.execute(basepair_query, [self.canonical_name(parent_region + '%' + point_region)])
        r['points_in_parent_region'] = cur.fetchone()[0]

        cur.execute(basepair_query, [self.canonical_name(parent_region + '%' + test_region + '%' + point_region)])
        r['points_in_test_region'] = cur.fetchone()[0]

        
        if r['basepairs_in_parent_region'] == 0:
            r['test_region_basepair_ratio'] = 1
        else:
            r['test_region_basepair_ratio'] = r['basepairs_in_test_region'] / \
                                              r['basepairs_in_parent_region']
        
        if r['points_in_parent_region'] == 0:
            r['test_region_point_count_ratio'] = 1
            r['enrichment_pvalue'] = 1
            r['depletion_pvalue'] = 1
            r['log2_enrichment_ratio'] = 0
            r['enrichment_ratio'] = 1.0
        else:
            r['test_region_point_count_ratio'] = r['points_in_test_region'] / r['points_in_parent_region']
            
            r['depletion_pvalue'] = binom.cdf(k=r['points_in_test_region'], 
                                              n=r['points_in_parent_region'],
                                              p=r['test_region_basepair_ratio'])

            r['enrichment_pvalue'] = 1 - \
                binom.cdf(k=r['points_in_test_region'] - 1,
                          n=r['points_in_parent_region'],
                          p=r['test_region_basepair_ratio'])

            r['enrichment_ratio'] = r['test_region_point_count_ratio'] /\
                                    r['test_region_basepair_ratio']

            if r['points_in_test_region'] == 0:
                r['log2_enrichment_ratio'] = float('-inf')
            else:
                r['log2_enrichment_ratio'] = math.log2(r['enrichment_ratio'])
            
            return r
def main(n, p_0, alpha=1 / 10, outdir='out'):
    testScipy(n, p_0, outdir)
    k_cutoff = binom.ppf(alpha, n, p_0) - 1
    print('k*: {}'.format(k_cutoff))
    print('P(binom < k*): {}'.format(binom.cdf(k_cutoff, n, p_0)))

    fig, ax = plt.subplots(1, 1)
    x = list(range(0, n + 1))
    ax.plot(x, binom.cdf(x, n, p_0), 'ro', ms=5)
    ax.plot([0, n], [alpha, alpha], 'g')
    ax.plot([k_cutoff, k_cutoff], [0, 1], 'b')
    # TODO alpha and k*
    plt.savefig(outdir + "/{}_{}.png".format(n, p_0))
Exemple #31
0
def probability_above(fun_resample, gamma, max_samp=None, comm=MPI.COMM_SELF,
                      batch=5, tol=0, bound_significance=0.01, print_per_batch=False,
                      exception_at_max_samp=False, printing=True):
    '''
        Returns True if P(fun_resample()) is significantly above gamma,
        returns False if P(fun_resample()) is significantly below gamma.
        Increases samples size until significance is obtained.
        (null hypothesis is p = gamma).
    '''
    vals = np.zeros((0,))
    s = "gamma = {}".format(gamma)
    while True:
        vals_new_samp = bootstrap(fun_resample, batch, comm=comm, dtype=np.bool_)
        #if True:#gamma == 0.05:
        #    print_all_ranks(comm, str(vals_new_samp))
        #vals_new_samp = vals_new_samp[~np.isnan(vals_new_samp)]
        vals = np.hstack([vals, vals_new_samp.astype(np.float_)])
        upper_bound_pval = binom.cdf(np.sum(vals), len(vals), gamma+tol)
        lower_bound_pval = 1 - binom.cdf(np.sum(vals)-1, len(vals), gamma-tol)

        s += ("\nnp.mean(vals) = {}".format(np.mean(vals)) +
              "\nlen(vals) = {}".format(len(vals)) +
              "\nupper_bound_pval = {}".format(upper_bound_pval) +
              "\nlower_bound_pval = {}".format(lower_bound_pval))
        if upper_bound_pval <= bound_significance:
            s += '\n---'
            if printing:
                print_rank0(comm, s)
            return False  # we have lower bound instead.
        if lower_bound_pval <= bound_significance:
            s += "\n---"
            if printing:
                print_rank0(comm, s)
            return True
        if not max_samp is None:
            if len(vals) > max_samp:
                if exception_at_max_samp:
                    raise MaxSampExceededException
                s += "\n---"+"\n"+"max_samp reached"
                print_rank0(comm, s)
                lower_bound = np.random.rand(1) < 0.5
                lower_bound = comm.bcast(lower_bound)
                if lower_bound:  # 50% chance to be above or below
                    return True
                return False
        batch = len(vals)
        if print_per_batch:
            if printing:
                print_rank0(comm, s)
            s = "gamma = {}".format(gamma)
def calculateProbabilities(counts, window_size, length, start=0):
    '''Calculates the probablity of observing the counted
    number of reads in windows of "window_size" around each
    cross-linked size.

    Currently assumes that coordinates are in transcript space.

    The length of the transcript must be provided because it
    is needed for calculating the prior and may be outside of the
    provided coordinates.

    Start allows, together with length, for only using part of
    counts.'''

    # limit to subset
    counts = counts[(counts.index.values >= start) &
                    (counts.index.values < (start + length))]
    total_counts = counts.sum()
   
    # probability is counts-2 because we want P(X>=x) which is
    # 1 - P(X<x-1). Thats -1. The other -1 comes from the fact
    # that we want the p that any base in the transcript has
    # X>=x, not just this specific one.

    single_base_ps = 1 - binom.cdf(counts-2, total_counts, 1.0/length)

    heights = np.zeros(len(counts))

    window_start = np.maximum(0, counts.index.values-window_size)
    window_end = np.minimum(start+length, counts.index.values + window_size)

    ps = (window_end - window_start + 0.0) / length
    ps = ps.astype("float64")

    for i, base in enumerate(counts.index.values):

        try:
            window = counts[window_start[i]:window_end[i]]
        except KeyError:
            print (window_start, window_end)
            print counts
  
        heights[i] = window.sum()
       
    heights = heights - counts.values
    window_ps = pd.Series(1 - binom.cdf(heights - 1, total_counts, ps),
                          index=counts.index)

    # correct for number of independent windows.
    return window_ps*single_base_ps
Exemple #33
0
def calculateProbabilities(counts, window_size, length, start=0):
    '''Calculates the probablity of observing the counted
    number of reads in windows of "window_size" around each
    cross-linked size.

    Currently assumes that coordinates are in transcript space.

    The length of the transcript must be provided because it
    is needed for calculating the prior and may be outside of the
    provided coordinates.

    Start allows, together with length, for only using part of
    counts.'''

    # limit to subset
    counts = counts[(counts.index.values >= start)
                    & (counts.index.values < (start + length))]
    total_counts = counts.sum()

    # probability is counts-2 because we want P(X>=x) which is
    # 1 - P(X<x-1). Thats -1. The other -1 comes from the fact
    # that we want the p that any base in the transcript has
    # X>=x, not just this specific one.

    single_base_ps = 1 - binom.cdf(counts - 2, total_counts, 1.0 / length)

    heights = np.zeros(len(counts))

    window_start = np.maximum(0, counts.index.values - window_size)
    window_end = np.minimum(start + length, counts.index.values + window_size)

    ps = (window_end - window_start + 0.0) / length
    ps = ps.astype("float64")

    for i, base in enumerate(counts.index.values):

        try:
            window = counts[window_start[i]:window_end[i]]
        except KeyError:
            print(window_start, window_end)
            print counts

        heights[i] = window.sum()

    heights = heights - counts.values
    window_ps = pd.Series(1 - binom.cdf(heights - 1, total_counts, ps),
                          index=counts.index)

    # correct for number of independent windows.
    return window_ps * single_base_ps
def plotBinomCDF():
    # this is just a very useless function, not adjustable, has to be refactored
    tau_p = 80
    k = 100
    p = 0.25
    fig, ax = plt.subplots(1, 1)

    print(binom.cdf(tau_p, k, 0.9))
    print(1 - binom.cdf(tau_p - 1, k, p))

    x = np.arange(0, k)
    ax.plot(x, binom.cdf(x, k, p), 'bo', ms=5, label='binom cdf')
    ax.vlines(x, 0, binom.cdf(x, k, p), colors='b', lw=5, alpha=0.5)

    plt.show()
Exemple #35
0
    def equivalence(self, x, n, pd0=0, conf_level=0.95):
        """      
        Equivalence one-tailed test
        
             pc >= pc0
        H0:  pd >= pd0
             d' >= d'0
             
             pc < pc0
        H1:  pd < pd0
             d' < d'0
             
        """
        alpha = 1 - conf_level

        pg = self.method.guessing
        pc = x / n
        pc0 = pg + (1 - pg) * pd0
        pd = (pc - pg) / (1 - pg)
        dprime = fsolve(lambda d: self.method.psychfunc(d) - pc, 1.0)[0]

        p_value = binom.cdf(x, n, pc0)
        xcrit = binom.ppf(alpha, n, pc0) + 1
        power = binom.cdf(xcrit, n, pc)

        pc_err = np.sqrt(pc * (1 - pc) / n)
        pd_err = pc_err / (1 - pg)
        der = derivative(self.method.psychfunc, dprime, dx=1e-6)
        dprime_err = pc_err / der

        # Lower limits
        pc_lower = max(beta.ppf(alpha / 2, x, n - x + 1), pg)
        pd_lower = (pc_lower - pg) / (1 - pg)
        dprime_lower = fsolve(lambda d: self.method.psychfunc(d) - pc_lower,
                              1.0)[0]

        # Upper limits
        pc_upper = min(beta.ppf(1 - alpha / 2, x + 1, n - x), 1.0)
        pd_upper = (pc_upper - pg) / (1 - pg)
        dprime_upper = fsolve(lambda d: self.method.psychfunc(d) - pc_upper,
                              1.0)[0]

        results = TestResults(
            pg, Statistic(pc, pc_err, pc_lower, pc_upper),
            Statistic(pd, pd_err, pd_lower, pd_upper),
            Statistic(dprime, dprime_err, dprime_lower, dprime_upper), p_value,
            alpha, power)
        return results
Exemple #36
0
def binomial_ci(mle, N, alpha=0.05):
    """ One sided confidence interval for a binomial test.
    To find the two sided interval, call with (1-alpha/2) and alpha/2 as arguments

    Parameters
    ----------
    mle : float
      Fraction of successes
    N : int
      Number of trials

    If after N trials we obtain mle as the proportion of those
    trials that resulted in success, find c such that

    P(k/N < mle; theta = c) = alpha

    where k/N is the proportion of successes in the set of trials,
    and theta is the success probability for each trial.
    """
    from scipy.stats import binom
    from scipy.optimize import bisect


    to_minimise = lambda c: binom.cdf(mle*N,N,c)-alpha
    return bisect(to_minimise,0,1)
Exemple #37
0
def triplet_prob(Nw,t,ptriplet,psingle):
    weights = np.zeros(Nw+1)
    # number of non-singles must be greater than or equal to t
    for ns in xrange(t,Nw+1):
        weights[ns] = 1.0 - binom.cdf(t-1,ns,ptriplet)
        
    return sum(weights * binom.pmf(np.linspace(0,Nw,Nw+1),Nw,(1.0-psingle)))
def check(N, p):
    global numfails, numchecks, mu, sigma2
    H = NeuronGroup(1, 'v:1', threshold='False', name='H')
    G = NeuronGroup(N, 'v:1', threshold='False', name='G')
    S = Synapses(H, G, on_pre='v+=w', name='S')
    S.connect(p=p)
    m = len(S)
    low, high = binom.interval(alpha, N, p)
    if p==0:
        low = high = 0
    elif p==1:
        low = high = N
    else:
        i = diff(S.j[:])
        i = i[i<isi_max[p]]
        b = bincount(i, minlength=isi_max[p])[:isi_max[p]]
        if b[0]:
            print 'Major error: repeated indices for N=%d, p=%.3f' % (N, p)
            raise ValueError("Repeated indices")
        isi[p] += b
        num_isi[p] += sum(b)
    q = binom.cdf(low-0.1, N, p)+binom.sf(high+0.1, N, p)
    mu += q
    sigma2 += q*(1-q)
    numchecks += 1
    if m<low or m>high:
        numfails += 1
        return True
    else:
        return False
Exemple #39
0
    def value_picks(picks):
        points =  get_points(picks,data)
        num_picks = len(picks)
        avg_picks = sum(points) / num_picks

        p = {str(n+1) + ' Star': 1 - binom.cdf(n, num_picks, avg_picks) for n in range(5)}
        return p
Exemple #40
0
        def clean_value_picks(self):  # return this in a clean format
            points =  get_points(self.picks)
            num_picks = self.quantity
            avg_picks = sum(points) / num_picks

            p = {str(n+1) + ' Star': str(int(100*(1 - binom.cdf(n, num_picks, avg_picks)))) + '%' for n in range(5)}
            return p
Exemple #41
0
    def clean_value_picks(picks):
        points =  get_points(picks,data)
        num_picks = len(picks)
        avg_picks = sum(points) / num_picks

        p = {str(n+1) + ' Star': str(int(100*(1 - binom.cdf(n, num_picks, avg_picks)))) + '%' for n in range(5)}
        return p
Exemple #42
0
def solve(problem):
    N, X, Y = problem
    if N == 0:
        return 0.0
    i = find_int_by_bisection(s, 1, N, N)
    if is_triangle_diamond(X, Y, i):
        return 1.0  # case A
    if not is_triangle_diamond(X, Y, i + 1):
        return 0.0  # case C
    # case B
    if X == 0:
        return 0.0  # B(1)
    m = s(i + 1) - N - 1
    if m < b(i + 1):
        return binom.cdf(abs(X) - 1, m, 0.5)  # B(2)
    return 1.0 - binom.cdf(b(i + 1) - abs(X) - 1, N - s(i), 0.5)  # B(3)
Exemple #43
0
def fraction_disc(results,N,overlap,fig,alpha=None,multiple_correction=False,n_replicates=None):
    """
    Given test results, a number of components N, a level of overlap between
    odorants, a reference figure panel ('a' or 'b'), an optional choice of 
    significance threshold alpha, whether or not to do multiple comparisons
    correction (false discovery rate method), and an optional new number of 
    replicates (subjects or tests), returns an array containing either the 
    fraction of correct responses (if alpha is None) or whether or not that 
    fraction is significantly above chance (if alpha is a number).
    This function assists with generating variants of Figs. 2B, 2C, 3A, 
    and 3B in Bushdid et al.
    """

    assert fig in ['a','b']
    correct,_,_ = correct_matrix(results,N,overlap)
    if fig == 'a':
        dim = 1
    elif fig == 'b':
        dim = 0
    fract_correct = np.mean(correct,dim)
    if alpha is not None:
        if not n_replicates:
            n_replicates = correct.shape[dim] # n_subjects or n_tests.
        ps = 1.0 - binom.cdf(fract_correct*n_replicates,n_replicates,1.0/3)
        if multiple_correction == 'bonferroni':
            alpha = alpha/len(ps)
        if multiple_correction == 'fdr':
            ps = np.array([p*len(ps)/(k+1) for k,p in enumerate(sorted(ps))])
        fract_sig = ps < alpha/2
        return fract_sig
    else:
        return fract_correct
Exemple #44
0
    def __init__(self, Year, pvalue = 0.01):

        A =Year.Adj
        v = float(A.sum())

        n, m = A.shape
        self.sets = (n, m)
        alpha = pvalue / float(n * m)
        in_degree = A.sum(0)
        out_degree = A.sum(1)
        i, j, aij = extract.find(A)
        
        nonzero = len(i)
        pij = np.zeros((nonzero, ))
        for h in xrange(nonzero):                      
            pij[h] = out_degree[i[h]] * in_degree[0,j[h]] / v**2
        P = 1-binom.cdf(aij - 1,v,pij)
        data = 1. * (P<= alpha)
        zero_entries = np.where(data == 0) 
        data = np.delete(data, zero_entries)
        i = np.delete(i, zero_entries)
        j = np.delete(j, zero_entries)
        aij = np.delete(aij,zero_entries)
        ij = np.asarray(zip(i,j)).T
        self.svnet = csc_matrix((data, ij))
        self.Adj = csc_matrix((aij,ij))
        self.filename = Year.filename
        self.edgetype = Year.edgetype
        self.banks = Year.banks
        self.firms = Year.firms
        self.descr = 'valid network'
Exemple #45
0
def pbinom(x, size=1, prob=0.5, lowertail=True, log=False):
    """
    ============================================================================
                                                                        pbinom()
    ============================================================================
    The cumulative distribution function for the binomial distribution.
    You provide a value along the binomial distribution (eg x=3) or array of
    values, and it returns what proportion of values lie below it (the quantile)

    Alternatively, if you select lowertail=False, it returns the proportion of
    values that are above it.

    USAGE:
    dbinom(x, size, prob=0.5, log=False)
    pbinom(x, size, prob=0.5, lowertail=True, log=False)
    qbinom(q, size, prob=0.5, lowertail=True)
    rbinom(n=1, size=1, prob=0.5)

    :param x:       int. or array of ints. The values along the distribution.
    :param size:    int. Number of trials
    :param prob:    float. Probability of a success
    :param lowertail bool. are you interested in what proportion of values lie
                     beneath x?
    :param log:     bool. take the log?
    :return:        an array of quantiles() corresponding to the values in x
    ============================================================================
    """
    if lowertail and not log:
        return binom.cdf(x, n=size, p=prob)
    elif not lowertail and not log:
        return binom.sf(x, n=size, p=prob)
    elif lowertail and log:
        return binom.logcdf(x, n=size, p=prob)
    else:
        return binom.logsf(x, n=size, p=prob)
    def __init__(self, Year, pvalue = 0.01):

        A = Year.Adj
        
        n, m = A.shape
        alpha = pvalue / n / m
        in_degree = A.sum(0)
        out_degree = A.sum(1)
        i, j, wij = extract.find(A)
        indices = np.where(wij > 0)
        eps = max(wij[indices].min(),0.1)
        v = A.sum() / eps
        
        nonzero = len(i)
        pij = np.zeros((nonzero, ))
        for h in xrange(nonzero):                      
            pij[h] = out_degree[i[h]] * in_degree[0,j[h]] / v**2
        P = 1 - binom.cdf(wij - 1,v,pij)
        data = P <= alpha
        zero_entries = np.where(data == 0)
        data = np.delete(data, zero_entries)
        i = np.delete(i, zero_entries)
        j = np.delete(j, zero_entries)
        wij = np.delete(wij,zero_entries)
        ij = np.asarray(zip(i,j)).T
        self.svnet = csc_matrix((data, ij), shape = (n,m) )
        self.Adj = csc_matrix((wij, ij), shape = (n,m) )
        self.nodes = Year.nodes
        self.filename = Year.filename
        self.edgetype = Year.edgetype
Exemple #47
0
def test_simple():
    nentries = 5
    h = FastSampler(nentries, max_value=100, min_value=1)

    weights = array([1,1,3,5,2], dtype='d')
    normalized_weights = weights/sum(weights)

    for i in range(nentries):
        h.add(i, weights[i])
        
    nsamples = 100000
    distro = zeros(nentries)
    
    for i in range(nsamples):
        idx = h.sample()
        distro[idx] += 1
    
    normalized_distro = distro / sum(distro)
    
    print distro
    print weights
    print normalized_distro
    print normalized_weights

    # Statistical test on result
    cdf_vals = binom.cdf(k=distro, n=nsamples, p=normalized_weights)
    print "CDF VALS"
    print cdf_vals
    has_bad_vals = any(logical_or(cdf_vals <= 0.005, cdf_vals >= 0.995))
    # Will fail about 5% of the time due to statistics
    assert not has_bad_vals
Exemple #48
0
        def value_picks(self): # Value picks from binomial cdf
            points =  self.points
            num_picks = len(points)
            avg_picks = sum(points) / num_picks

            p = {str(n+1) + ' Star': 1 - binom.cdf(n, num_picks, avg_picks) for n in range(5)}
            return p
def optimize_4(N, tx_SNR_range, filepath_down, filepath_up, protocol=4*10**4, downtarget=10**(-9), uptarget=10**(-9)):
    """We enforce a Downlink additive noise target and an Uplink additive noise target. We do NOT assume the blocklength is evenly divided among all three phases. Instead, we allocate the minimum blocklength so that Downlink meets its additive noise target. Then we allocate the minimum blocklength so that Uplink meets its additive noise target. The remaining blocklength is allocated to the XOR phase, which determines the XOR additive noise. The optimization module then finds the transmitter SNR and receiver SNR pair so that the combination of parameters will allow the protocl to meet its overall reliability target.

    Arguments:
      N {int} -- The number of nodes/users total in the control system
      tx_SNR_range {np.arange} -- [description]
      filepath_down {string} -- [description]
      filepath_up {string} -- [description]

    Keyword Arguments:
      protocol {int} -- The length of the entire protocol (all phases combined) in bits (default: {40,000})
      downtarget {float: fraction} -- [description] (default: {10**(-9)})
      uptarget {float: fraction} -- [description] (default: {10**(-9)})

    Returns:
      np.array of length 5 --
      0. The SNR experienced at the receiver (post-fade)
      1. The SNR sent at the transmitter (pre-fade) -- this is what we care about minimizing
      2. The blocklength of the Downlink Phase in bits
      3. The blocklength of the Uplink Phase in bits
      4. The blocklength of the XOR Phase in bits
    """
    downFile = filepath_down + str(N) + '.in'
    downNode = load_table(downFile)
    upFile = filepath_up + str(N) + '.in'
    upNode = load_table(upFile)

    for nominal_SNR in tx_SNR_range:
        for actual_SNR in np.arange(max(-1, nominal_SNR-90), nominal_SNR, 0.1):
            downbit, upbit = float("inf"), float("inf")
            for bit in sorted(downNode.tables.keys()):
                bittable = downNode.tables[bit]
                downfunc = interp1d(bittable[0], bittable[1], kind='linear', bounds_error=False, fill_value=(1.0, 0.0))
                if downfunc(actual_SNR) <= downtarget:
                    downbit = bit
                    break
            for bit in sorted(upNode.tables.keys()):
                bittable = upNode.tables[bit]
                upfunc = interp1d(bittable[0], bittable[1], kind='linear', bounds_error=False, fill_value=(1.0, 0.0))
                if upfunc(actual_SNR) <= uptarget:
                    upbit = bit
                    break
            xorbit = protocol - downbit - upbit
            if xorbit <= 0: continue
#             xorbit = max(0, 4200 - downbit - upbit)
            # We calculate reeddrop each time because the rate changes every time (new table)
            blocklength = int(xorbit / 21 / N)
            rate = N * 160 / xorbit * 7 / 4 if xorbit else float("inf")
            if rate > 1: continue
            k = (1-rate)*blocklength
            pbitdrop = Q(np.sqrt(2*10**(actual_SNR/10)))
            hcerr = 1 - ((1-pbitdrop)**7 + 7*pbitdrop*(1-pbitdrop)**6)
            hcf = 1 - (1-hcerr)**3
            reeddrop = 1-binom.cdf(int(k/2), blocklength, hcf)
            # reeddrop = sum([nCr(blocklength, d)* hcf**d *(1-hcf)**(blocklength-d) for d in range(int(k/2)+1, blocklength)])
            xor_opt = xor_analysis_opt(N, reeddrop, nominal_SNR, actual_SNR, downfunc(actual_SNR), upfunc(actual_SNR))
            if 1-xor_opt <= protocol_target:
                return (actual_SNR, nominal_SNR, downbit, upbit, xorbit, downfunc(actual_SNR), upfunc(actual_SNR), reeddrop)
    return (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan) # default behavior when nothing is returned
Exemple #50
0
def probability_in_interval(fun_resample, gamma_lower, gamma_upper,
                            significance_first=0.01, significance_second=0.05,
                            batch=5, comm=MPI.COMM_SELF,
                            print_per_batch=False, printing=True):
    N_test_max = 20000
    vals = np.zeros((0,))
    s = "gamma_lower, gamma_upper = {}, {}".format(gamma_lower, gamma_upper)
    while True:
        vals_new_samp = bootstrap(fun_resample, batch, comm=comm)
        vals = np.hstack([vals, vals_new_samp])
        upper_bound_pval = binom.cdf(np.sum(vals), len(vals), gamma_upper)
        lower_bound_pval = 1 - binom.cdf(np.sum(vals)-1, len(vals), gamma_lower)
        s += ("\nnp.mean(vals) = {}".format(np.mean(vals)) +
              "\nlen(vals) = {}".format(len(vals)) +
              "\nupper_bound_pval = {}".format(upper_bound_pval) +
              "\nlower_bound_pval = {}".format(lower_bound_pval))
        if upper_bound_pval < significance_first:
            if lower_bound_pval < significance_second:
                s += '\n===\nin interval\n==='
                print_rank0(comm, s)
                return 'in interval'
            if 1-binom.cdf(int(np.round(np.mean(vals)*N_test_max))-1, N_test_max, gamma_lower) < significance_second:
                batch = len(vals)
                continue  # Expecting less than N_test_max tests to verify lower bound
            s += '\n===\nbelow upper bound\n==='
            if printing:
                print_rank0(comm, s)
            return 'below upper bound'
        if lower_bound_pval < significance_first:
            if upper_bound_pval < significance_second:
                s += '\n===\nin interval\n==='
                if printing:
                    print_rank0(comm, s)
                return 'in interval'
            if binom.cdf(int(np.round(np.mean(vals)*N_test_max)), N_test_max, gamma_upper) < significance_second:
                batch = len(vals)
                continue  # Expecting less than N_test_max tests to verify upper bound
            s += '\n===\nabove lower bound\n==='
            if printing:
                print_rank0(comm, s)
            return 'above lower bound'
        batch = len(vals)
        if print_per_batch:
            if printing:
                print_rank0(comm, s)
            s = "gamma_lower, gamma_upper = {}, {}".format(gamma_lower, gamma_upper)
Exemple #51
0
def binomial_distribution_vectors(n, p, v_size=None):
    pmf = np.array([binom.pmf(i, n, p) for i in range(n+1)])
    cdf = np.array([binom.cdf(i, n, p) for i in range(n+1)])
    if (v_size != None) and (v_size > n+1):
        # padding to the right to reach v_size length
        pmf = np.append(pmf, np.zeros(v_size-n))
        cdf = np.append(cdf, np.ones(v_size-n))
    return pmf,cdf
Exemple #52
0
def uBinBound(m,k,delta=0.05):
    """
    Calculates the upper bound of the risk,
    using the binomial tail approach of Langford
    """
    if k == m: return 1.
    else:
        f = lambda x: binom.cdf( k, m, x ) - delta
        return bisect( f, float(k)/m, 1.0 )
Exemple #53
0
 def detect_link(self, other, threshold, loss_probability, incoming=True, nmax = 0):
     difference = self.difference(other)
     if difference < nmax: return False
     if incoming: 
         received = self.second_moment()
         sent = received + difference
     else:
         sent = self.second_moment()
         received = self.second_moment() - difference
     return binom.cdf(received, round(sent), 1-loss_probability) < threshold
Exemple #54
0
def pbinom(k, n):
  """
  Compute cdf for binomial with prob = 0.5
  compare to R pbinom
  :param k:
  :param n:
  :return: cumulative probability
  """

  return binom.cdf(k, n, 0.5)
Exemple #55
0
def binomialTailTest(counts, nTrials, pEvent, oneSided=True):
  
  counts = array(counts)
  
  mean = nTrials * pEvent
  
  if oneSided:
    result = zeros(counts.shape)
    isAboveMean = counts > mean
    aboveIdx = isAboveMean.nonzero()
    belowIdx = (~isAboveMean).nonzero()
    result[aboveIdx] = binom.sf(counts[aboveIdx]-1, nTrials, pEvent)
    result[belowIdx] = binom.cdf(counts[belowIdx], nTrials, pEvent)
    
  else:
    diffs = abs(counts-mean)
    result = binom.cdf(mean-diffs, nTrials, pEvent)
    result += binom.sf(mean+diffs-1, nTrials, pEvent)
    
  return result
def prob_node_vs_community(CTC,NTC, M):

    I = np.diag(np.ones((len(CTC), )))
    CTC = np.multiply(CTC, 1 - I)
    NTC = np.multiply(NTC, 1 - M)
    ext_in_degree = CTC.sum(0)
    v = NTC.sum()
    ext_out_degree = NTC.sum(1)
    P = ext_out_degree * ext_in_degree / v**2
    P = 1 - binom.cdf(NTC-1,v,P)      

    return P
Exemple #57
0
    def simprob(self,avector):
        #function to calculate binomial probability of getting sim or less by chance given widths of vectors

        mywidth=self.width
        awidth=avector.width
        #width = (mywidth+awidth)*0.5
        n=awidth
        p=mywidth/WordVector.dim
        sim = self.linsim(avector)
        r= math.floor(sim*awidth)
        prob = binom.cdf(r,n,p)
        return prob
def BinomialErrors_old(nobs, Nsamp, alpha=0.16):
    """
    One sided confidence interval for a binomial test.

    If after Nsamp trials we obtain nobs
    trials that resulted in success, find c such that

    P(nobs/Nsamp < mle; theta = c) = alpha

    where theta is the success probability for each trial. 

    Code stolen shamelessly from stackoverflow: 
    http://stackoverflow.com/questions/13059011/is-there-any-python-function-library-for-calculate-binomial-confidence-intervals
    """
    from scipy.stats import binom

    p0 = float(nobs) / float(Nsamp)
    to_minimise = lambda c: binom.cdf(nobs, Nsamp, c) - alpha
    upper_errfcn = lambda c: binom.cdf(nobs, Nsamp, c) - alpha
    lower_errfcn = lambda c: binom.cdf(nobs, Nsamp, c) - (1.0 - alpha)
    return p0, bisect(lower_errfcn, 0, 1), bisect(upper_errfcn, 0, 1)
def calculateSDRFalseMatchError(kVal,
                                thetaVal=20,
                                nVal=2048,
                                wVal=40,
                                mVal=10,
                                cVal=5):
  numCellsInUnionVal = calculateNumCellsVsK(kVal, nVal, cVal, mVal)

  pMatchBit = float(numCellsInUnionVal)/ (nVal * mVal)

  pFalseMatch = 1 - binom.cdf(thetaVal, wVal, pMatchBit)
  return pFalseMatch