def posterior(x, n, p1, p2): """ Calculates the posterior probability that the probability of developing severe side effects falls within a specific range given the data :param x: number of patients that develop severe side effects :param n: total number of patients observed :param p1: is the lower bound on the range :param p2: is the upper bound on the range :return: the posterior probability that p is within the range [p1, p2] given x and n """ if (type(n) is not int) or (n <= 0): raise ValueError("n must be a positive integer") if (type(x) is not int) or (x < 0): raise ValueError("x must be an integer that is greater than or equal " "to 0") if x > n: raise ValueError("x cannot be greater than n") if type(p1) is not float or p1 < 0 or p1 > 1: raise ValueError("p1 must be a float in the range [0, 1]") if type(p2) is not float or p2 < 0 or p2 > 1: raise ValueError("p2 must be a float in the range [0, 1]") if p2 <= p1: raise ValueError("p2 must be greater than p1") # Uniform prior + binomial likelihood => Beta posterior # Beta(x + 1, n - x + 1) beta2 = special.btdtr(x + 1, n - x + 1, p2) beta1 = special.btdtr(x + 1, n - x + 1, p1) pos = beta2 - beta1 return pos
def posterior(x, n, p1, p2): """[summary] Args: x ([type]): [description] n ([type]): [description] p1 ([type]): [description] p2 ([type]): [description] Raises: ValueError: [description] ValueError: [description] ValueError: [description] ValueError: [description] ValueError: [description] ValueError: [description] Returns: [type]: [description] """ if not isinstance(n, int) or n < 1: raise ValueError('n must be a positive integer') if x > n: raise ValueError('x cannot be greater than n') if not isinstance(p1, float) or p1 < 0 or p1 > 1: raise ValueError('p1 must be a float in the range [0, 1]') if not isinstance(p2, float) or p2 < 0 or p2 > 1: raise ValueError('p2 must be a float in the range [0, 1]') if p2 <= p1: raise ValueError("p2 must be greater than p1") return special.btdtr(x + 1, n - x + 1, p2) \ - special.btdtr(x + 1, n - x + 1, p1)
def posterior(x, n, p1, p2): """ calculates posterior probability: the probability of ending up in x (With severe side-effects) given you are in n (taking the drug) x: number of patients that develop sideeffects n: total patients p1: lower bound of the range p2: upper bound of range Returns: posterior prob that p is in range [p1, p2] given x and n """ if type(n) is not int or n <= 0: raise ValueError("n must be a positive integer") if type(x) is not int or x < 0: error = "x must be an integer that is greater than or equal to 0" raise ValueError(error) if x > n: raise ValueError("x cannot be greater than n") if not isinstance(p1, float) or p1 < 0 or p1 > 1: raise ValueError("p1 must be a float in the range [0, 1]") if not isinstance(p2, float) or p2 < 0 or p2 > 1: raise ValueError("p2 must be a float in the range [0, 1]") if p2 <= p1: raise ValueError("p2 must be greater than p1") # choose = special.comb(n, x) # for binomial distribution conjugate prior is beta (parameters a, b) # so doing bayesian flip p(p|x, n) is alos beta # we can use scipy to easily cal beta values upper = special.btdtr(1 + x, 1 + n - x, p2) lower = special.btdtr(1 + x, 1 + n - x, p1) # upper = special.betainc(x + 1, 1 + n - x, p2) # lower = special.betainc(x + 1, 1 + n - x, p1) return upper - lower
def posterior(x, n, p1, p2): """ calculates the likelihood of obtaining this data Args: x: is the number of patients that develop severe side effects n: is the total number of patients observed p1: is the lower bound on the range p2: is the upper bound on the range Returns: the posterior probability that p is within the range [p1, p2] given x and n """ if not (type(n) is int) or n <= 0: raise ValueError("n must be a positive integer") if not (type(x) is int) or x < 0: msg = "x must be an integer that is greater than or equal to 0" raise ValueError(msg) if x > n: raise ValueError("x cannot be greater than n") if type(p1) is not float or not 0 <= p1 <= 1: raise ValueError("p1 must be a float in the range [0, 1]") if type(p2) is not float or not 0 <= p2 <= 1: raise ValueError("p2 must be a float in the range [0, 1]") if p2 <= p1: raise ValueError("p2 must be greater than p1") # Uniform prior + binomial likelihood => Beta posterior # Beta(x + 1, n - x + 1) beta1 = special.btdtr(x + 1, n - x + 1, p1) beta2 = special.btdtr(x + 1, n - x + 1, p2) pos = beta2 - beta1 return pos
def posterior(x, n, p1, p2): """ Calculates the posterior probability that the probability of developing severe side effects falls within a specific range given the data """ if (type(n) is not int) or (n <= 0): raise ValueError("n must be a positive integer") if (type(x) is not int) or (x < 0): raise ValueError("x must be an integer that is greater than or equal " "to 0") if x > n: raise ValueError("x cannot be greater than n") if type(p1) is not float or p1 < 0 or p1 > 1: raise ValueError("p1 must be a float in the range [0, 1]") if type(p2) is not float or p2 < 0 or p2 > 1: raise ValueError("p2 must be a float in the range [0, 1]") if p2 <= p1: raise ValueError("p2 must be greater than p1") beta2 = special.btdtr(x + 1, n - x + 1, p2) beta1 = special.btdtr(x + 1, n - x + 1, p1) return beta2 - beta1
def posterior(x, n, p1, p2): """ that calculates the posterior probability for the various hypothetical probabilities of developing severe side effects given the data x is the number of patients that develop severe side effects n is the total number of patients observed p1 p2 """ if not isinstance(n, (int, float)) or n <= 0: raise ValueError("n must be a positive integer") if not isinstance(x, (int, float)) or x < 0: raise ValueError( "x must be an integer that is greater than or equal to 0") if x > n: raise ValueError("x cannot be greater than n") if type(p1) is not float or p1 < 0 or p1 > 1: raise TypeError("p1 must be a float in the range [0, 1]") if type(p2) is not float or p2 < 0 or p2 > 1: raise TypeError("p2 must be a float in the range [0, 1]") if p2 <= p1: raise ValueError("p2 must be greater than p1") Pab1 = special.btdtr(x + 1, n - x + 1, p1) Pab2 = special.btdtr(x + 1, n - x + 1, p2) Pab = Pab2 - Pab1 return Pab
def apply_statstest(df_highcounts, df_lowcounts, correlation_value): """Check if data is normal distributed and calculate the p-value for the correlation value Parameters ---------- df_highcounts : pandas.Dataframe df_lowcounts : pandas.Dataframe correlation_value : float Returns ------- prob : float probability for the correlation value """ # check if data is normal distributed ps_hc, ps_lc = check_data_normaldist(df_highcounts, df_lowcounts) # if p-value < 0.05 -> variable violates the assumption of normality => Use test if (ps_hc <= 0.05) & (ps_lc <= 0.05): ab = len(df_highcounts.values) / 2 - 1 prob = 2 * special.btdtr(ab, ab, 0.5 * (1 - abs(np.float64(correlation_value)))) print(prob) return prob else: ab = len(df_highcounts.values) / 2 - 1 prob = 2 * special.btdtr(ab, ab, 0.5 * (1 - abs(np.float64(correlation_value)))) print(prob) print('Causion: Not normally distributed data') return prob
def vcf_graph(rows, pop, binsize, title, filename): expx = rows["ac_%s" % pop].divide(rows["an_%s" % pop]) expx = expx[((expx > 0) & (expx < 1))] alphax, betax, _, _ = beta.fit(expx, floc=0, fscale=1) x = np.arange(0, 1, binsize) binx = [(x[i + 1] + x[i]) / 2 for i in range(len(x) - 1)] y = [ btdtr(alphax, betax, x[i + 1]) - btdtr(alphax, betax, x[i]) for i in range(len(x) - 1) ] fig = go.Figure() fig.add_trace( go.Histogram(x=expx, histnorm='probability', name="Experimental", autobinx=False, xbins=dict(start=0, end=1, size=binsize), opacity=.9)) fig.add_trace(go.Bar(x=binx, y=y, name="Theory", opacity=.9)) fig.update_layout(autosize=False, width=800, height=600, yaxis=go.layout.YAxis(title_text="P(x)", range=[0, 1]), xaxis=go.layout.XAxis(title_text="x", range=[0, 1]), title_text=title, legend_orientation="h") #fig.write_image(filename) ksexp = kstest(expx, 'beta', args=(alphax, betax)) ksneut = kstest('beta', False, args=(alphax, betax), N=expx.size) return (alphax, betax, ksexp.statistic, ksexp.pvalue, ksneut.statistic, ksneut.pvalue, expx.size)
def posterior(x, n, p1, p2): """calculates the posterior probability that the probability of developing severe side effects falls within a specific range given the data: -> x is the number of patients that develop severe side effects -> n is the total number of patients observed -> p1 is the lower bound on the range -> p2 is the upper bound on the range -> You can assume the prior beliefs of p follow a uniform distribution -> Returns: the posterior probability that p is within the range [p1, p2] given x and n """ if not isinstance(n, int) or n <= 0: raise ValueError('n must be a positive integer') if not isinstance(x, int) or x < 0: msg = 'x must be an integer that is greater than or equal to 0' raise ValueError(msg) if x > n: raise ValueError('x cannot be greater than n') if not isinstance(p1, float) or p1 < 0 or p1 > 1: raise ValueError('p1 must be a float in the range [0, 1]') if not isinstance(p2, float) or p2 < 0 or p2 > 1: raise ValueError('p2 must be a float in the range [0, 1]') if p2 <= p1: raise ValueError('p2 must be greater than p1') """ The binomial distribution is the PMF of k successes given n independent events each with a probability p of success. Mathematically, when α = k + 1 and β = n − k + 1, the beta distribution and the binomial distribution are related by a factor of n + 1 https://en.wikipedia.org/wiki/Binomial_distribution """ a = x + 1 beta = n - x + 1 b_cdf = special.btdtr(a, beta, p1) b_cdf2 = special.btdtr(a, beta, p2) b_pdf = b_cdf2 - b_cdf return b_pdf
def posterior(x, n, p1, p2): """ * x is the number of patients that develop severe side effects * n is the total number of patients observed * p1 is the lower bound on the range * p2 is the upper bound on the range * You can assume the prior beliefs of p follow a uniform distribution * If n is not a positive integer, raise a ValueError with the message n must be a positive integer * If x is not an integer that is greater than or equal to 0, raise a ValueError with the message x must be an integer that is greater than or equal to 0 * If x is greater than n, raise a ValueError with the message x cannot be greater than n * If p1 or p2 are not floats within the range [0, 1], raise aValueError with the message {p} must be a float in the range [0, 1] where {p} is the corresponding variable * if p2 <= p1, raise a ValueError with the message p2 must be greater than p1 * The only import you are allowed to use is from scipy import math, special Returns: the posterior probability that p is within the range [p1, p2] given x and n """ if type(n) is not int or n <= 0: raise ValueError('n must be a positive integer') if type(x) is not int or x < 0: m = 'x must be an integer that is greater than or equal to 0' raise ValueError(m) if x > n: raise ValueError('x cannot be greater than n') if type(p1) is not float or p1 < 0 or p1 > 1: raise ValueError('p1 must be a float in the range [0, 1]') if type(p2) is not float or p2 < 0 or p2 > 1: raise ValueError('p2 must be a float in the range [0, 1]') if p2 <= p1: raise ValueError('p2 must be greater than p1') Posterior2 = special.btdtr(x + 1, n - x + 1, p2) Posterior1 = special.btdtr(x + 1, n - x + 1, p1) Posterior = Posterior2 - Posterior1 return Posterior
def cdf(self, x): """ Computes the cumulative distribution function of the distribution at the point(s) x. The cdf is defined as follows (where alpha->a beta ->b, Fbetainc is incomplete beta function and Fbeta is the complete beta function): F(x|a, b) = Fbetainc(a, b, x) / Fbeta(a, b) Parameters ---------- x: array, dtype=float, shape=(m x n) The value(s) at which the user would like the cdf evaluated. If an array is passed in, the cdf is evaluated at every point in the array and an array of the same size is returned. Returns ------- cdf: array, dtype=float, shape=(m x n) The cdf at each point in x. """ alpha = self.alpha beta = self.beta cdf = btdtr(alpha, beta, x) return cdf
def _correl_pvalue(r, n, k=0): """Compute the two-sided p-value of a correlation coefficient. https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#Using_the_exact_distribution Parameters ---------- r : float Correlation coefficient. n : int Sample size k : int Number of covariates for (semi)-partial correlation. Returns ------- pval : float Two-tailed p-value. Notes ----- This uses the same approach as :py:func:`scipy.stats.pearsonr` to calculate the p-value (i.e. using a beta distribution) """ from scipy.special import btdtr # Method 1: using a student T distribution # dof = n - k - 2 # tval = r * np.sqrt(dof / (1 - r**2)) # pval = 2 * t.sf(abs(tval), dof) # Method 2: beta distribution (similar to scipy.stats.pearsonr, faster) ab = (n - k) / 2 - 1 pval = 2 * btdtr(ab, ab, 0.5 * (1 - abs(np.float64(r)))) return pval
def _p_value(corr: float, observation_length: int) -> float: ab = observation_length / 2 - 1 if ab == 0: p_value = 1.0 else: p_value = 2 * btdtr(ab, ab, 0.5 * (1 - abs(np.float64(corr)))) return p_value
def posterior(x, n, p1, p2): """ *************************************************************** ***calculates the posterior probability that the probability*** ***of developing severe side effects falls within a specific*** *********************range given the data********************** *************************************************************** @x: is the number of patients that develop severe side effects @n: is the total number of patients observed @p1: is the lower bound on the range @p2: is the upper bound on the range *** You can assume the prior beliefs of p follow a uniform distribution *** If n is not a positive integer, raise a ValueError with the message n must be a positive integer *** If x is not an integer that is greater than or equal to 0, raise a ValueError with the message x must be an integer that is greater than or equal to 0 *** If x is greater than n, raise a ValueError with the message x cannot be greater than n *** If p1 or p2 are not floats within the range [0, 1], raise a ValueError with the message {p} must be a float in the range [0, 1] where {p} is the corresponding variable *** If p2 <= p1, raise a ValueError with the message p2 must be greater than p1 *** The only import you are allowed to use is from scipy import special Returns: the posterior probability that p is within the range [p1, p2] given x and n """ if type(n) is not int or n < 1: raise ValueError("n must be a positive integer") if type(x) is not int or x < 0: text = "x must be an integer that is greater than or equal to 0" raise ValueError(text) if x > n: raise ValueError("x cannot be greater than n") if (not isinstance(p1, float)) or p1 < 0 or p1 > 1: raise ValueError("p1 must be a float in the range [0, 1]") if (not isinstance(p2, float)) or p2 < 0 or p2 > 1: raise ValueError("p2 must be a float in the range [0, 1]") if p2 <= p1: raise ValueError("p2 must be greater than p1") a = x + 1 b = n - x + 1 ac1 = special.btdtr(a, b, p1) ac2 = special.btdtr(a, b, p2) return ac2 - ac1
def _sf_single(self, x, n, a, b): k = floor(x) p = linspace(0, 1, num=10001) bta = btdtr(a, b, p) p_med = (p[:-1] + p[1:]) / 2 bta_med = bta[1:] - bta[:-1] vals = (bdtrc(k, n, p_med) * bta_med).sum(axis=-1) return vals
def ztnb_cdf(y, mu, alpha): r = 1.0 / alpha if y <= 0: raise Exception('y must be larger than 0.') p = mu / (mu + r + 0.0) F_ztnb = (1 - special.btdtr(y + 1, r, p) - np.power(1 - p, r)) / (1 - np.power(1 - p, r)) return F_ztnb
def posterior(x, n, p1, p2): """ calculates the posterior probability that the probability of developing severe side effects falls within a specific range given the data :param x: number of patients that develop severe side effects :param n: total number of patients observed :param p1: lower bound on the range :param p2: upper bound on the range :return: posterior probability that p is within the range [p1, p2] given x and n """ if not isinstance(n, int) or n < 1: err = 'n must be a positive integer' raise ValueError(err) if not isinstance(x, int) or x < 0: err = 'x must be an integer that is greater than or equal to 0' raise ValueError(err) if x > n: err = 'x cannot be greater than n' raise ValueError(err) if not isinstance(p1, float) or p1 < 0 or p1 > 1: err = 'p1 must be a float in the range [0, 1]' raise ValueError(err) if not isinstance(p2, float) or p2 < 0 or p2 > 1: err = 'p2 must be a float in the range [0, 1]' raise ValueError(err) if p2 <= p1: err = 'p2 must be greater than p1' raise ValueError(err) a = x + 1 b = n - x + 1 # Cumulative distribution function cum_beta1 = special.btdtr(a, b, p1) cum_beta2 = special.btdtr(a, b, p2) result = cum_beta2 - cum_beta1 return result
def posterior(x, n, p1, p2): """posterior probability within a specific range given the data""" if not isinstance(n, int) or n <= 0: raise ValueError("n must be a positive integer") if not isinstance(x, int) or x < 0: raise ValueError( "x must be an integer that is greater than or equal to 0") if x > n: raise ValueError('x cannot be greater than n') if not isinstance(p1, float) or p1 < 0 or p1 > 1: raise ValueError("p1 must be a float in the range [0, 1]") if not isinstance(p2, float) or p2 < 0 or p2 > 1: raise ValueError("p2 must be a float in the range [0, 1]") if p2 <= p1: raise ValueError("p2 must be greater than p1") y = special.btdtr(x+1, n-x+1, p1) z = special.btdtr(x+1, n-x+1, p2) return z - y
def efficient_pearsonr(a, b): """ Computes correlation of matching columns in `a` and `b` Parameters ---------- a,b : array_like Sample observations. These arrays must have the same length and either an equivalent number of columns or be broadcastable Returns ------- corr : float or numpy.ndarray Pearson's correlation coefficient between matching columns of inputs pval : float or numpy.ndarray Two-tailed p-values Examples -------- >>> from netneurotools import datasets, stats Generate some not-very-correlated and some highly-correlated data: >>> np.random.seed(12345678) # set random seed for reproducible results >>> x1, y1 = datasets.make_correlated_xy(corr=0.1, size=100) >>> x2, y2 = datasets.make_correlated_xy(corr=0.8, size=100) Calculate both correlations simultaneously: >>> x = np.column_stack((x1, x2)) >>> y = np.column_stack((y1, y2)) >>> stats.efficient_pearsonr(x, y) (array([0.10032565, 0.79961189]), array([3.20636135e-01, 1.97429944e-23])) """ a, b, axis = _chk2_asarray(a, b, 0) if len(a) != len(b): raise ValueError('Provided arrays do not have same length') if a.size == 0 or b.size == 0: return np.nan, np.nan a, b = a.reshape(len(a), -1), b.reshape(len(b), -1) if (a.shape[1] != b.shape[1]): a, b = np.broadcast_arrays(a, b) with np.errstate(invalid='ignore'): corr = sstats.zscore(a, ddof=1) * sstats.zscore(b, ddof=1) corr = np.sum(corr, axis=0) / (len(a) - 1) corr = np.squeeze(np.clip(corr, -1, 1)) / 1 # taken from scipy.stats ab = (len(a) / 2) - 1 prob = 2 * special.btdtr(ab, ab, 0.5 * (1 - np.abs(corr))) return corr, prob
def posterior(x, n, p1, p2): """ Returns: the posterior probability that p is within the range [p1, p2] given x and n """ if type(n) is not int or n <= 0: raise ValueError("n must be a positive integer") if type(x) is not int or x < 0: err = "x must be an integer that is greater than or equal to 0" raise ValueError(err) if x > n: raise ValueError("x cannot be greater than n") if type(p1) is not float or p1 < 0 or p1 > 1: raise ValueError("p1 must be a float in the range [0, 1]") if type(p2) is not float or p2 < 0 or p2 > 1: raise ValueError("p2 must be a float in the range [0, 1]") if p2 <= p1: raise ValueError("p2 must be greater than p1") """ Cumulative distribution function of the beta distribution. Returns the integral from zero to u of the beta probability density function. btdtr(a, b, u) a: Shape parameter (a > 0) b: Shape parameter (b > 0) u: Upper limit of integration, in [0, 1] """ # x follows a binomial distribution # Relation between beta distribution and binomial distribution a = x + 1 b = n - x + 1 cdf_beta1 = special.btdtr(a, b, p1) cdf_beta2 = special.btdtr(a, b, p2) Posterior = cdf_beta2 - cdf_beta1 return Posterior
def posterior(x, n, p1, p2): """ * x is the number of patients that develop severe side effects * n is the total number of patients observed * p1 is the lower bound on the range * p2 is the upper bound on the range * You can assume the prior beliefs of p follow a uniform distribution * If n is not a positive integer, raise a ValueError with the message n must be a positive integer * If x is not an integer that is greater than or equal to 0, raise a ValueError with the message x must be an integer that is greater than or equal to 0 * If x is greater than n, raise a ValueError with the message x cannot be greater than n * If p1 or p2 are not floats within the range [0, 1], raise aValueError with the message {p} must be a float in the range [0, 1] where {p} is the corresponding variable * if p2 <= p1, raise a ValueError with the message p2 must be greater than p1 * The only import you are allowed to use is from scipy import special * Returns: the posterior probability that p is within the range [p1, p2] given x and n """ if type(n) is not int or n <= 0: raise ValueError("n must be a positive integer") err = "x must be an integer that is greater than or equal to 0" if type(x) is not int or x < 0: raise ValueError(err) if x > n: raise ValueError("x cannot be greater than n") if type(p1) is not float or p1 < 0 or p1 > 1: raise ValueError("p1 must be a float in the range [0, 1]") if type(p2) is not float or p2 < 0 or p2 > 1: raise ValueError("p2 must be a float in the range [0, 1]") if p2 <= p1: raise ValueError("p2 must be greater than p1") alpha = x + 1 beta = n - x + 1 b_p1 = special.btdtr(alpha, beta, p1) b_p2 = special.btdtr(alpha, beta, p2) pp = b_p2 - b_p1 return pp
def posterior(x, n, p1, p2): """ posterior probability function """ if type(n) is not int or n < 1: raise ValueError("n must be a positive integer") if type(x) is not int or x < 0: text = "x must be an integer that is greater than or equal to 0" raise ValueError(text) if x > n: raise ValueError("x cannot be greater than n") if (not isinstance(p1, float)) or p1 < 0 or p1 > 1: raise ValueError("p1 must be a float in the range [0, 1]") if (not isinstance(p2, float)) or p2 < 0 or p2 > 1: raise ValueError("p2 must be a float in the range [0, 1]") if p2 <= p1: raise ValueError("p2 must be greater than p1") f1 = x + 1 f2 = n - x + 1 ac1 = special.btdtr(f1, f2, p1) ac2 = special.btdtr(f1, f2, p2) return ac2 - ac1
def posterior(x, n, p1, p2): """function""" if type(n) is not int or n < 1: raise ValueError("n must be a positive integer") if type(x) is not int or x < 0: err = "x must be an integer that is greater than or equal to 0" raise ValueError(err) if x > n: raise ValueError("x cannot be greater than n") if p1 < 0 or p1 > 1 or not type(p1) is float: raise ValueError("p1 must be a float in the range [0, 1]") if p2 < 0 or p2 > 1 or not type(p2) is float: raise ValueError("p2 must be a float in the range [0, 1]") if p2 <= p1: raise ValueError("p2 must be greater than p1") func1 = x + 1 func2 = (n - x) + 1 int2 = special.btdtr(func1, func2, p2) int1 = special.btdtr(func1, func2, p1) integral = int2 - int1 return integral
def posterior(x, n, p1, p2): """"Function that calculates the posterior probability that the probability of developing severe side effects falls within a specific range given the data""" if type(n) is not int or n < 1: raise ValueError("n must be a positive integer") if type(x) is not int or x < 0: text = "x must be an integer that is greater than or equal to 0" raise ValueError(text) if x > n: raise ValueError("x cannot be greater than n") if (not isinstance(p1, float)) or p1 < 0 or p1 > 1: raise ValueError("p1 must be a float in the range [0, 1]") if (not isinstance(p2, float)) or p2 < 0 or p2 > 1: raise ValueError("p2 must be a float in the range [0, 1]") if p2 <= p1: raise ValueError("p2 must be greater than p1") f1 = x + 1 f2 = n - x + 1 ac1 = special.btdtr(f1, f2, p1) ac2 = special.btdtr(f1, f2, p2) return ac2 - ac1
def posterior(x, n, p1, p2): """ Returns: the posterior probability that p is within the range [p1, p2] given x and n """ if type(n) is not int or n <= 0: raise ValueError('n must be a positive integer') if type(x) is not int or x < 0: m = 'x must be an integer that is greater than or equal to 0' raise ValueError(m) if x > n: raise ValueError('x cannot be greater than n') if type(p1) is not float or p1 < 0 or p1 > 1: raise ValueError('p1 must be a float in the range [0, 1]') if type(p2) is not float or p2 < 0 or p2 > 1: raise ValueError('p2 must be a float in the range [0, 1]') if p2 <= p1: raise ValueError('p2 must be greater than p1') Posterior2 = special.btdtr(x + 1, n - x + 1, p2) Posterior1 = special.btdtr(x + 1, n - x + 1, p1) Posterior = Posterior2 - Posterior1 return Posterior
def _consensus_finder(self): """ key: 0 - no consensus 1 - unAmbigous Zone 2 - medium ambiguity Zone 3 - high ambiguity zone 4 - most ambiguity """ myPred = self.predicate votes_cast = self.num_yes + self.num_no larger = max(self.num_yes, self.num_no) smaller = min(self.num_yes, self.num_no) uncertLevel = 2 if toggles.BAYES_ENABLED: if self.value > 0: uncertLevel = btdtr(self.num_yes+1, self.num_no+1, myPred.consensus_decision_threshold) else: uncertLevel = btdtr(self.num_no+1, self.num_yes+1, myPred.consensus_decision_threshold) if votes_cast >= myPred.consensus_max: return 4 elif uncertLevel < myPred.consensus_uncertainty_threshold: return 1 elif larger >= myPred.consensus_max_single: if smaller < myPred.consensus_max_single*(1.0/3.0): #TODO un-hard-code this part return 1 elif smaller < myPred.consensus_max_single*(2.0/3.0): return 2 else: return 3 else: return 0
def pearson_significance(row): corr = row["corr"] if corr == 0: return 1 ab = row["n"] / 2 - 1 beta = 2 * special.btdtr(ab, ab, 0.5 * (1 - abs(corr))) # account for small p-values rounding to 0 beta = max(np.finfo(np.float64).tiny, beta) return beta
def posterior(x, n, p1, p2): """ Returns the posterior probability that p is within the range [p1, p2] given x and n """ if type(n) is not int or n < 1: raise ValueError("n must be a positive integer") if type(x) is not int or x < 0: err = "x must be an integer that is greater than or equal to 0" raise ValueError(err) if x > n: raise ValueError("x cannot be greater than n") if type(p1) is not float or p1 < 0 or p1 > 1: raise ValueError("p1 must be a float in the range [0, 1]") if type(p2) is not float or p2 < 0 or p2 > 1: raise ValueError("p2 must be a float in the range [0, 1]") if p2 <= p1: raise ValueError("p2 must be greater than p1") f1 = x + 1 f2 = n - x + 1 alfa = special.btdtr(f1, f2, p2) beta = special.btdtr(f1, f2, p1) return alfa - beta
def product_moment_corr(x, y): """ Product-moment correlation for two ndarrays x, y """ r, n = _product_moment_corr(x, y) # From scipy.stats.pearsonr: # As explained in the docstring, the p-value can be computed as # p = 2*dist.cdf(-abs(r)) # where dist is the beta distribution on [-1, 1] with shape parameters # a = b = n/2 - 1. `special.btdtr` is the CDF for the beta distribution # on [0, 1]. To use it, we make the transformation x = (r + 1)/2; the # shape parameters do not change. Then -abs(r) used in `cdf(-abs(r))` # becomes x = (-abs(r) + 1)/2 = 0.5*(1 - abs(r)). (r is cast to float64 # to avoid a TypeError raised by btdtr when r is higher precision.) ab = n / 2 - 1 prob = 2 * btdtr(ab, ab, 0.5 * (1 - abs(np.float64(r)))) return r, prob
def posterior(x, n, p1, p2): '''calculates the posterior probability that the probability of developing severe side effects falls within a specific range given the data Args: x is the number of patients that develop severe side effects n is the total number of patients observed p1 is the lower bound on the range p2 is the upper bound on the range Important: it is assumed the prior beliefs of p follow a uniform distrib Returns: the posterior probability that p is within the range [p1, p2] given x and n ''' if (type(n) is not int or n <= 0): raise ValueError("n must be a positive integer") if (type(x) is not int or x < 0): raise ValueError("x must be an integer that is greater " + "than or equal to 0") if (x > n): raise ValueError("x cannot be greater than n") if (type(p1) is not float or p1 < 0 or p1 > 1): raise ValueError("p1 must be a float in the range [0, 1]") if (type(p2) is not float or p2 < 0 or p2 > 1): raise ValueError("p2 must be a float in the range [0, 1]") if (p2 <= p1): raise ValueError("p2 must be greater than p1") beta1 = special.btdtr(x + 1, n - x + 1, p1) beta2 = special.btdtr(x + 1, n - x + 1, p2) return (beta2 - beta1)
def _cdf(self, x, a, b): return special.btdtr(a, b, x)
def _cdf(self, x): a, b = np.loadtxt(os.path.join(FILE_DIR, 'distr_par.txt'), delimiter = ',') return special.btdtr(a, b, x)