Esempio n. 1
0
def make_negative_binom_density(r,
                                p,
                                w,
                                size_of_counts,
                                left_most,
                                for_plot=False):
    negative_binom_density_array = np.zeros(size_of_counts + 1,
                                            dtype=np.float128)
    dist1 = st.nbinom(r, p)
    f1 = dist1.pmf
    cdf1 = dist1.cdf
    dist2 = st.nbinom(r, 1 - p)
    f2 = dist2.pmf
    cdf2 = dist2.cdf
    negative_binom_norm = (cdf1(size_of_counts) - cdf1(left_most - 1)) * w + \
                          (cdf2(size_of_counts) - cdf2(left_most - 1)) * (1 - w)
    plot_norm = (cdf1(size_of_counts) - cdf1(4)) * w + \
                (cdf2(size_of_counts) - cdf2(4)) * (1 - w)
    for k in range(5, size_of_counts + 1):
        if for_plot:
            negative_binom_density_array[k] = (w * f1(k) +
                                               (1 - w) * f2(k)) / plot_norm
        else:
            negative_binom_density_array[k] = (
                w * f1(k) + (1 - w) * f2(k)) / negative_binom_norm
    return negative_binom_density_array
Esempio n. 2
0
 def bag_size_gen(self, num_bags, random_state, max_pts=None):
     if self.size_type == 'uniform':
         lo, hi = self.bag_sizes
         sizes = random_state.randint(low=lo, high=hi + 1, size=num_bags)
     elif self.size_type == 'neg-binom':
         # Do a negative binomial + 1 (in Wikipedia's notation),
         # so that sizes are a distribution on the positive integers.
         # mean = p r / (1 - p) + 1; var = (mean - 1) / (1 - p)
         mean, std = self.bag_sizes
         p = 1 - (mean - 1) / (std * std)
         assert 0 < p < 1
         r = (mean - 1) * (1 - p) / p
         assert r > 0
         # scipy swaps p and 1-p
         sizes = []
         if max_pts is not None:
             while max_pts > 0:
                 size_bag = stats.nbinom(r, 1 - p).rvs(
                     size=1, random_state=random_state)[0] + 1
                 max_pts = max_pts - size_bag
                 if max_pts >= 0:
                     sizes.append(size_bag)
                 else:
                     sizes.append(max_pts + size_bag)
         else:
             sizes = stats.nbinom(r, 1 - p).rvs(
                 size=num_bags, random_state=random_state) + 1
     else:
         raise ValueError("unknown size_type {}".format(self.size_type))
     return sizes
def neg_binomial_3(Neg_Binom_Params):
    u = Neg_Binom_Params.valuesdict()
    model_value = (u['coef1']) * scs.nbinom(u['n1'], u['p1']).pmf(vals) + (
        u['coef2']) * scs.nbinom(u['n2'], u['p2']).pmf(vals) + (
            1 - u['coef1'] - u['coef2']) * scs.nbinom(u['n3'],
                                                      u['p3']).pmf(vals)
    residuals = model_value - actual
    return (residuals)
Esempio n. 4
0
    def computeSumOfDensities(self, pBackgroundModel, pArgs, pXfoldMaxValue=None):
        background_nbinom = {}
        background_sum_of_densities_dict = {}
        max_value = 0

        fixateRange = int(pArgs.fixateRange)
        for distance in pBackgroundModel:
            max_value_distance = int(pBackgroundModel[distance][2])
            if max_value < int(pBackgroundModel[distance][2]):
                max_value = int(pBackgroundModel[distance][2])

            if pXfoldMaxValue is not None:
                max_value_distance *= pXfoldMaxValue

            if -int(pArgs.fixateRange) < distance and int(pArgs.fixateRange) > distance:
                background_nbinom[distance] = nbinom(pBackgroundModel[distance][0], pBackgroundModel[distance][1])

                sum_of_densities = np.zeros(max_value_distance)
                for j in range(max_value_distance):
                    if j >= 1:
                        sum_of_densities[j] += sum_of_densities[j - 1]
                    sum_of_densities[j] += background_nbinom[distance].pmf(j)

                background_sum_of_densities_dict[distance] = sum_of_densities

        background_nbinom[fixateRange] = nbinom(pBackgroundModel[fixateRange][0], pBackgroundModel[fixateRange][1])

        sum_of_densities = np.zeros(max_value)
        for j in range(max_value):
            if j >= 1:
                sum_of_densities[j] += sum_of_densities[j - 1]
            sum_of_densities[j] += background_nbinom[fixateRange].pmf(j)

        background_sum_of_densities_dict[fixateRange] = sum_of_densities
        background_nbinom[-fixateRange] = nbinom(pBackgroundModel[-fixateRange][0], pBackgroundModel[-fixateRange][1])

        sum_of_densities = np.zeros(max_value)
        for j in range(max_value):
            if j >= 1:
                sum_of_densities[j] += sum_of_densities[j - 1]
            sum_of_densities[j] += background_nbinom[-fixateRange].pmf(j)

        background_sum_of_densities_dict[-fixateRange] = sum_of_densities

        min_key = min(background_sum_of_densities_dict)
        max_key = max(background_sum_of_densities_dict)

        for key in pBackgroundModel.keys():
            if key in background_sum_of_densities_dict:
                continue
            if key < min_key:
                background_sum_of_densities_dict[key] = background_sum_of_densities_dict[min_key]
            elif key > max_key:
                background_sum_of_densities_dict[key] = background_sum_of_densities_dict[max_key]

        return background_sum_of_densities_dict
Esempio n. 5
0
    def get_probability_density_func(self):
        """
        Calculates the probabilities for the NegativeBinomial x_values.
        """
        dist = nbinom(n=self.r, p=self.p)

        self.probabilities = dist.pmf(self.x_values)
Esempio n. 6
0
 def _reset_distribution(self):
     """
     https://stackoverflow.com/questions/40846992/
     alternative-parametrization-of-the-negative-binomial-in-scipy
     #comment109394209_47406400
     """
     self._distribution: rv_discrete = nbinom(self._r, 1 - self._p)
Esempio n. 7
0
def print_stats(seqs):
    lens = get_lengths(seqs)
    m, v, p, r = dist_parameters(lens)
    print "mean\tvariance\tmedian\tr\tp"
    print "\t".join(map(str, [m, v, scipy.median(lens), r, p]))
    
    return lens, stats.nbinom(r, 1-p).pmf
Esempio n. 8
0
def N_test_neg_binom(
    num_obs_events: int,
    rupture_rate: float,
    prob_success: float,
    r_dispersion: float,
    conf_interval: float,
) -> dict:

    if r_dispersion < 1:
        logging.warn("Earthquake production temporally underdispersed, \n"
                     "switching to Poisson N-Test")
        return N_test_poisson(num_obs_events, rupture_rate, conf_interval)

    conf_min, conf_max = nbinom(r_dispersion,
                                prob_success).interval(conf_interval)
    test_pass = conf_min <= num_obs_events <= conf_max

    test_res = "Pass" if test_pass else "Fail"
    logging.info(f"N-Test: {test_res}")

    test_result = {
        "conf_interval_pct": conf_interval,
        "conf_interval": (conf_min, conf_max),
        "inv_time_rate": rupture_rate,
        "n_obs_earthquakes": num_obs_events,
        "test_res": test_res,
        "test_pass": bool(test_pass),
    }

    return test_result
Esempio n. 9
0
def test_zig_cdf():
  np.random.seed(0)
  x = st.nbinom(n=10, p=.1).rvs(size=100)
  Fx = scmodes.benchmark.gof._zig_cdf(x, size=1, log_mu=-5, log_phi=-1, logodds=-3)
  assert Fx.shape == x.shape
  assert (Fx >= 0).all()
  assert (Fx <= 1).all()
Esempio n. 10
0
def _shannon_entropy(col, alpha, beta):

	if not isinstance(col, (np.ndarray,)):
		col = np.array(col, shape = len(col), dtype = float)

	if len(alpha) != len(beta):
		return ArgumentError

	priors = []
	for i in range(0, len(alpha)):
		priors.append( sp.nbinom(alpha[i], beta[i]) )

	col = np.sort(np.around(col))
	if col.shape[0] == 1:
		return 0.0
	else:
		## apply prior scaling
		weights = np.zeros_like(col, dtype = float)
		for i in range(0, col.shape[0]):
			weights[i] = np.max([ p.pmf(col[i]) for p in priors ])
		#print col
		#print weights
		freq = col * weights
		freq = freq/np.sum(freq)
		#print freq
		H = -1 * freq * np.log2(freq)
		#print H
		return np.nansum(H)
 def nb_iter(n,p):
     yield 0.0
     nb = nbinom(n,p)
     for i in count():
         pr = nb.pmf(i)
         if pr<1e-5: break
         yield pr
Esempio n. 12
0
def _shannon_entropy(col, alpha, beta):

    if not isinstance(col, (np.ndarray, )):
        col = np.array(col, shape=len(col), dtype=float)

    if len(alpha) != len(beta):
        return ArgumentError

    priors = []
    for i in range(0, len(alpha)):
        priors.append(sp.nbinom(alpha[i], beta[i]))

    col = np.sort(np.around(col))
    if col.shape[0] == 1:
        return 0.0
    else:
        ## apply prior scaling
        weights = np.zeros_like(col, dtype=float)
        for i in range(0, col.shape[0]):
            weights[i] = np.max([p.pmf(col[i]) for p in priors])
        #print col
        #print weights
        freq = col * weights
        freq = freq / np.sum(freq)
        #print freq
        H = -1 * freq * np.log2(freq)
        #print H
        return np.nansum(H)
Esempio n. 13
0
def plot_negbinomial_fit(data,
                         fit_results,
                         title=None,
                         x_label=None,
                         x_range=None,
                         y_range=None,
                         fig_size=(6, 5),
                         bin_width=1,
                         filename=None):
    """
    :param data: (numpy.array) observations
    :param fit_results: dictionary with keys "n", "p" and "loc"
    :param title: title of the figure
    :param x_label: label to show on the x-axis of the histogram
    :param x_range: (tuple) x range
    :param y_range: (tuple) y range
        (the histogram shows the probability density so the upper value of y_range should be 1).
    :param fig_size: int, specify the figure size
    :param bin_width: bin width
    :param filename: filename to save the figure as
    """

    plot_fit_discrete(data=data,
                      dist=stat.nbinom(n=fit_results['n'],
                                       p=fit_results['p'],
                                       loc=fit_results['loc']),
                      label='Negative Binomial',
                      bin_width=bin_width,
                      title=title,
                      x_label=x_label,
                      x_range=x_range,
                      y_range=y_range,
                      fig_size=fig_size,
                      filename=filename)
Esempio n. 14
0
def rpp(x, log_mu, log_phi, logodds, size, onehot, n_samples=1):
    # Important: these are n x 1
    n = onehot.dot(np.exp(-log_phi))
    pi0 = onehot.dot(sp.expit(-logodds))
    p = 1 / (1 + (size * onehot.dot(np.exp(log_mu + log_phi))))

    cdf = st.nbinom(n=n, p=p).cdf(x - 1)
    # Important: this excludes the right endpoint, so we need to special case x =
    # 0
    cdf = np.where(x > 0, pi0 + (1 - pi0) * cdf, cdf)
    pmf = st.nbinom(n=n, p=p).pmf(x)
    pmf *= (1 - pi0)
    pmf[x == 0] += pi0[x == 0]
    u = np.random.uniform(size=(n_samples, x.shape[0]))
    # cdf and pmf are n x 1
    rpp = cdf.ravel() + u * pmf.ravel()
    return rpp
 def predict(self, size=100):
     b, alpha, phi = self.theta_opt
     a = b * self.mu 
     p = b / (1 + b)
     rv = nbinom(a, p).rvs(size=size)
     y50 = np.mean(rv)
     y25 = np.quantile(rv, 0.25)
     y90 = np.quantile(rv, 0.9)
     return y50, y25, y90
Esempio n. 16
0
 def choose(self):
     self.name = "Neg-binomial"
     if self.user_class == 'HF':
         peak_hours_for_number_of_requests_hf = [
             10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23
         ]
         if self.hour in peak_hours_for_number_of_requests_hf:
             nbinom_n_size, nbinom_mu_mean = 0.470368548315641, 34.7861725808564
         else:
             nbinom_n_size, nbinom_mu_mean = .143761308534382, 14.158264589062
     elif self.user_class == 'HO':
         peak_hours_for_number_of_requests_ho = [
             10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23
         ]
         if self.hour in peak_hours_for_number_of_requests_ho:
             nbinom_n_size, nbinom_mu_mean = 0.113993444740046, 1.04026982546095
         else:
             nbinom_n_size, nbinom_mu_mean = 0.0448640346452827, 0.366034837767499
     elif self.user_class == 'MF':
         peak_hours_for_number_of_requests_mf = [
             8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
         ]
         if self.hour in peak_hours_for_number_of_requests_mf:
             nbinom_n_size, nbinom_mu_mean = 0.758889839349924, 4.83390315655562
         else:
             nbinom_n_size, nbinom_mu_mean = 0.314653746175354, 3.22861572712093
     elif self.user_class == 'MO':
         peak_hours_for_number_of_requests_mo = [
             8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22
         ]
         if self.hour in peak_hours_for_number_of_requests_mo:
             nbinom_n_size, nbinom_mu_mean = 0.177211316065872, 0.406726610288464
         else:
             nbinom_n_size, nbinom_mu_mean = 0.0536955764781434, 0.124289074773539
     elif self.user_class == 'LF':
         peak_hours_for_number_of_requests_lf = [
             8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
         ]
         if self.hour in peak_hours_for_number_of_requests_lf:
             nbinom_n_size, nbinom_mu_mean = 0.480203280455517, 0.978733578849008
         else:
             nbinom_n_size, nbinom_mu_mean = 0.240591506072217, 0.487956906502501
     elif self.user_class == 'LO':
         peak_hours_for_number_of_requests_lo = [
             8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22
         ]
         if self.hour in peak_hours_for_number_of_requests_lo:
             nbinom_n_size, nbinom_mu_mean = 0.188551092877969, 0.111187768162793
         else:
             nbinom_n_size, nbinom_mu_mean = 0.0810585648991726, 0.0405013083716073
     else:
         raise Exception('The user class %s does not exist' %
                         self.user_class)
     # From R's documentation: An alternative parametrization (often used in ecology) is by the
     #  _mean_ 'mu', and 'size', the _dispersion parameter_, where 'prob' = 'size/(size+mu)'
     nbinom_prob = nbinom_n_size / (nbinom_n_size + nbinom_mu_mean)
     return nbinom(nbinom_n_size, nbinom_prob)
Esempio n. 17
0
def generate_onset_to_reporting_distribution_brauner():
    """
    Build onset-to-reporting distribution
    """
    # Distribution used by [Brauner et al., 2020]
    mu = 5.25
    alpha = 1.57
    distrb = nbinom(n=1 / alpha, p=1 - alpha * mu / (1 + alpha * mu))
    x = range(int(distrb.ppf(1 - 1e-6)))
    return distrb.pmf(x)
Esempio n. 18
0
 def test_rvs(self):
     vals = stats.nbinom.rvs(10, 0.75, size=(2, 50))
     assert_(numpy.all(vals >= 0))
     assert_(numpy.shape(vals) == (2, 50))
     assert_(vals.dtype.char in typecodes['AllInteger'])
     val = stats.nbinom.rvs(10, 0.75)
     assert_(isinstance(val, int))
     val = stats.nbinom(10, 0.75).rvs(3)
     assert_(isinstance(val, numpy.ndarray))
     assert_(val.dtype.char in typecodes['AllInteger'])
Esempio n. 19
0
 def test_rvs(self):
     vals = stats.nbinom.rvs(10, 0.75, size=(2, 50))
     assert numpy.all(vals >= 0)
     assert numpy.shape(vals) == (2, 50)
     assert vals.dtype.char in typecodes["AllInteger"]
     val = stats.nbinom.rvs(10, 0.75)
     assert isinstance(val, int)
     val = stats.nbinom(10, 0.75).rvs(3)
     assert isinstance(val, numpy.ndarray)
     assert val.dtype.char in typecodes["AllInteger"]
Esempio n. 20
0
def _ebpm_point_gamma_update(theta, x, s):
    logodds, a, b = theta
    p = sp.expit(logodds)
    nb_lik = st.nbinom(n=a, p=1 / (1 + s / b)).pmf(x)
    z = np.where(x < 1, p * nb_lik / (1 - p + p * nb_lik), 1)
    pm = (x + a) / (s + b)
    plm = sp.digamma(x + a) - np.log(s + b)
    logodds = np.log(z.sum()) - np.log((1 - z).sum() + 1e-16)
    b = a * z.sum() / (z * pm).sum()
    a = _ebpm_point_gamma_update_a(a, z, plm, b)
    return np.array([logodds, a, b])
Esempio n. 21
0
def nbinSim(*ps):
    '''
    # Test negative binomial model.
    # ps[0] - mean of nbinom distribution
    # ps[1] - aggregation k factor.
    '''
    p = ps[0] / (ps[1] + ps[0])
    if p == 0:
        return np.zeros(1000)
    else:
        return stats.nbinom(n=ps[0], p=p).rvs(size=1000)
Esempio n. 22
0
def nbinSim(*ps):
    '''
    # Test negative binomial model.
    # ps[0] - mean of nbinom distribution
    # ps[1] - aggregation k factor.
    '''
    p = ps[0]/(ps[1]+ps[0])
    if p == 0:
        return np.zeros(1000)
    else:
        return stats.nbinom(n=ps[0],p=p).rvs(size=1000)
Esempio n. 23
0
 def test_Geometric_to_NBinom(self):
     exp_list, obs_list = [], []
     X = Geometric(p=0.8)
     sims = X.sim(Nsim)
     simulated = sims.tabulate()
     for k in range(1, 10):
         expected = Nsim * stats.nbinom(n=1, p=0.8).pmf(k - 1)
         if expected > 5:
             exp_list.append(expected)
             obs_list.append(simulated[k])
     pval = stats.chisquare(obs_list, exp_list).pvalue
     self.assertTrue(pval > 0.01)
Esempio n. 24
0
 def test_NBinom_Pascal_additive(self):
     exp_list, obs_list = [], []
     X, Y = RV(Pascal(r=4, p=0.6) * Pascal(r=6, p=0.6))
     sims = (X + Y).sim(Nsim)
     simulated = sims.tabulate()
     for k in range(10, 35):
         expected = Nsim * stats.nbinom(n=10, p=0.6).pmf(k)
         if expected > 5:
             exp_list.append(expected)
             obs_list.append(simulated[k])
     pval = stats.chisquare(obs_list, exp_list).pvalue
     self.assertTrue(pval > .01)
Esempio n. 25
0
def simulate_nb_gamma():
    np.random.seed(1)
    n = 100
    p = 5
    s = 1e5 * np.ones((n, 1))
    theta = 0.2
    log_mu = np.random.uniform(-12, -6, size=(1, p))
    log_phi = np.random.uniform(-6, 0, size=(1, p))
    G = st.gamma(a=np.exp(-log_phi), scale=np.exp(log_mu + log_phi))
    lam = G.rvs(size=(n, p))
    x = st.nbinom(n=1 / theta, p=1 / (1 + s * lam * theta)).rvs()
    return x, s, log_mu, log_phi, theta
Esempio n. 26
0
    def demand_quantile(self, my_percentile=0):
        if my_percentile == 0:
            my_percentile = self.percentile
        for sku_id in self.sku_list:
            for dc_id in self.dc_list:
                row = self.distribution.loc[
                    (self.distribution.item_sku_id == sku_id)
                    & (self.distribution.dc_id == dc_id)]
                dist_type = row.dist_type.iloc[0]
                para1 = row.para1.astype(float).iloc[0]
                para2 = row.para2.astype(float).iloc[0]
                if dist_type == 'N':
                    ng_bi = sp.nbinom(para1, para2)
                    if dc_id == 0:
                        self.dR_it[sku_id - 1] = np.ceil(
                            ng_bi.ppf(my_percentile))
                    else:
                        self.d_ijt[sku_id - 1, dc_id - 1] = np.ceil(
                            ng_bi.ppf(my_percentile))
                elif dist_type == 'G':
                    g = sp.gamma(para1, scale=para2)
                    if dc_id == 0:
                        self.dR_it[sku_id - 1] = np.ceil(g.ppf(my_percentile))
                    else:
                        self.d_ijt[sku_id - 1,
                                   dc_id - 1] = np.ceil(g.ppf(my_percentile))

        # # Assign mean values as deterministic sku_demand
        # self.dR_it = np.zeros((1000)).astype(int)
        # self.d_ijt = np.zeros((1000, 5)).astype(int)
        # for sku_id in self.sku_list:
        #     for dc_id in self.dc_list:
        #         row = self.distribution.loc[(self.distribution.item_sku_id == sku_id) & (self.distribution.dc_id == dc_id)]
        #         dist_type = row.dist_type.iloc[0]
        #         para1 = row.para1.astype(float).iloc[0]
        #         para2 = row.para2.astype(float).iloc[0]
        #         if dist_type == 'N':
        #             if dc_id == 0:
        #                 self.dR_it[sku_id-1] = para1*(1-para2)/para2
        #             else:
        #                 self.d_ijt[sku_id-1, dc_id-1] = para1*(1-para2)/para2
        #         elif dist_type == 'G':
        #             if dc_id == 0:
        #                 self.dR_it[sku_id-1] = para1*para2
        #             else:
        #                 self.d_ijt[sku_id-1, dc_id-1] = para1*para2
        #         else:
        #             if dc_id == 0:
        #                 print ("No distribution for sku", sku_id, "in RDC")
        #             else:
        #                 print ("No distribution for sku", sku_id, "in FDC", dc_id)
        return (self.dR_it, self.d_ijt)
Esempio n. 27
0
    def _calc_negbinom(self, domain_matrix):
        sigs = []
        means = [np.mean(self.hicmap.diagonal(i)) for i in range(self.hicmap.shape[0])]
        lens = [len(self.hicmap.diagonal(i)) for i in range(self.hicmap.shape[0])]

        def sum_mean(i, j):
            """
            Counts the mean across several consecutive diagonals in the hicmap
            """
            s = sum([m * l for (m, l) in list(zip(means, lens))[i:j]])
            l = sum(lens[i:j])
            try:
                return s / l
            except ZeroDivisionError:
                return 0

        def sum_var(i, j, m):
            """
            Counts the variance in several consecutive diagonals given their mean
            """
            mses = [np.mean((self.hicmap.diagonal(i) - m) ** 2) for i in range(i, j)]
            s = sum([m * l for (m, l) in zip(mses, lens[i:j])])
            l = sum(lens[i:j])
            try:
                return s / l
            except:
                return 0

        pvalue_matrix = np.ones(domain_matrix.shape)

        for i in range(domain_matrix.shape[0]):
            li = self.domains[i][1] - self.domains[i][0] + 1
            for j in range(i + 1, domain_matrix.shape[1]):
                lj = self.domains[j][1] - self.domains[j][0] + 1
                dist = self.domains[j][0] - self.domains[i][1]
                span = self.domains[j][1] - self.domains[i][0] + 1
                expected = sum_mean(dist, span)
                var = sum_var(dist, span, expected)
                mean = expected * li * lj
                if var < mean:
                    var = mean + 1
                k = domain_matrix[i][j]
                r = mean ** 2 / (var - mean) if (var - mean) != 0 else np.nan
                p = (var - mean) / var if var != 0 else np.nan
                model = ss.nbinom(n=r, p=1 - p)
                if expected and k:
                    pval = model.sf(k)
                    pvalue_matrix[i, j] = pval
                    if pval < self.threshold:
                        sigs.append((i, j, pval))

        return sigs, self._fdr_correct(pvalue_matrix, domain_matrix.shape)
Esempio n. 28
0
def update_umi(attr, old, new):
    selected = ind_data.selected['1d']['indices']
    with sqlite3.connect(db) as conn:
        if selected:
            ind = ind_data.data['ind'][selected[0]]
            print("Selected {}, {}".format(ind, gene))
            umi = pd.read_sql(
                """select umi.value, annotation.size from annotation, umi 
        where umi.gene == ? and annotation.chip_id == ? and 
        umi.sample == annotation.sample""",
                con=conn,
                params=(
                    gene,
                    ind,
                ))
            keep = umi['value'] < 19
            edges = np.arange(20)
            counts, _ = np.histogram(umi['value'].values, bins=edges)
            umi_data.data = bokeh.models.ColumnDataSource.from_df(
                pd.DataFrame({
                    'left': edges[:-1],
                    'right': edges[1:],
                    'count': counts
                }))

            params = pd.read_sql(
                'select log_mean, log_disp, logodds from params where gene == ? and ind == ?',
                con=conn,
                params=(gene, ind))
            n = np.exp(params['log_disp'])
            p = 1 / (1 + np.outer(
                umi['size'], np.exp(params['log_mean'] - params['log_disp'])))
            assert (n > 0).all(), 'n must be non-negative'
            assert (p >= 0).all(), 'p must be non-negative'
            assert (p <= 1).all(), 'p must be <= 1'
            G = st.nbinom(n=n.values.ravel(), p=p.ravel()).pmf
            grid = np.arange(19)
            pmf = np.array([G(x).mean() for x in grid])
            if params.iloc[0]['logodds'] is not None:
                pmf *= sp.expit(-params['logodds']).values
                pmf[0] += sp.expit(params['logodds']).values
            exp_count = umi.shape[0] * pmf
            dist_data.data = bokeh.models.ColumnDataSource.from_df(
                pd.DataFrame({
                    'x': .5 + grid,
                    'y': exp_count
                }))
        else:
            umi_data.data = bokeh.models.ColumnDataSource.from_df(
                pd.DataFrame(columns=['left', 'right', 'count']))
            dist_data.data = bokeh.models.ColumnDataSource.from_df(
                pd.DataFrame(columns=['x', 'y']))
Esempio n. 29
0
 def _consultations(self):
     """
     Calculates the expected number of consultations each day
     """
     self.E = np.zeros(self.Y.size[0])
     self.Cdist = np.empty(self.Y.size[0], dtype=nbinom)
     for i in range(1, self.Y.size[0]):
         #incident cases Z_i = S(i-1)-S(i)
         #expected consultations for Covid 19: E_i = prob * Z_i
         self.E[i] = (self.Y[i - 1, 0] -
                      self.Y[i, 0]) * self.care_probability
         r = pow(self.E[i], self.delta)
         self.Cdist[i] = nbinom(n=r, p=self.E[i] / (r + self.E[i]))
Esempio n. 30
0
def simulate_point_gamma():
    x, s, log_mu, log_phi, _ = _simulate_gamma()
    n, p = x.shape
    logodds = np.random.uniform(-3, -1, size=(1, p))
    pi0 = sp.expit(logodds)
    z = np.random.uniform(size=x.shape) < pi0
    y = np.where(z, 0, x)
    F = st.nbinom(n=np.exp(-log_phi),
                  p=1 / (1 + s.dot(np.exp(log_mu + log_phi))))
    llik_nonzero = np.log(1 - pi0) + F.logpmf(y)
    llik = np.where(y < 1, np.log(pi0 + np.exp(llik_nonzero)),
                    llik_nonzero).sum()
    return y, s, log_mu, log_phi, logodds, llik
Esempio n. 31
0
def test_ebpm_gamma_extrapolate(simulate_gamma):
    x, s, log_mu, log_phi, _ = simulate_gamma
    # Important: log_mu, log_phi are [1, p]. We want oracle log likelihood for
    # only gene 0
    oracle_llik = st.nbinom(
        n=np.exp(-log_phi[0, 0]),
        p=1 / (1 + s.dot(np.exp(log_mu[0, 0] + log_phi[0, 0])))).logpmf(
            x[:, 0]).sum()
    log_mu_hat, neg_log_phi_hat, llik = scmodes.ebpm.ebpm_gamma(
        x[:, 0], s.ravel(), extrapolate=True)
    assert np.isfinite(log_mu_hat)
    assert np.isfinite(neg_log_phi_hat)
    assert llik > oracle_llik
Esempio n. 32
0
    def find_high_lim(self):
        """
        Finds the high interval to use in calculations for the variable basis 
        and univariate norm squared values.
        """
        low_percent = 8e-17
        high_percent = 1 - low_percent

        stand_dist = nbinom(n=self.r, p=self.p)
        high = np.ceil(stand_dist.ppf(high_percent))
        low = np.floor(stand_dist.ppf(low_percent))

        self.x_values = np.arange(low, high + 1)
Esempio n. 33
0
def gridOptimFlanks(fitparams, quantiles=(.05, .5), toquantiles=(.5, .99), thres=.1, p=[0, 1], n=[0, 500]):
    # The central distribution:
    fit = nbinom(fitparams[0], fitparams[1])

    # The squared difference between the quantiles:
    def ssq(n, p, quantiles, toquantiles):
        this_fit = nbinom(n, p)
        return (np.sum([(this_fit.ppf(quantiles[i]) - fit.ppf(toquantiles[i])) ** 2 for i in range(len(quantiles))]))

    previous, this = (np.mean(n), np.mean(p)), (0.001, 0.001)
    N = np.linspace(n[0], n[1], 100)
    P = np.linspace(p[0], p[1], 100)

    iter = 0
    while abs(ssq(previous[0], previous[1], quantiles, toquantiles) - ssq(this[0], this[1], quantiles,
                                                                          toquantiles)) > thres:
        iter += 1
        print(str("Iteration # %s" % str(iter)).ljust(15, " ") + "|", end="")
        previous = this[:]
        dist = np.full((100, 100), np.nan)
        for i, ni in enumerate(N):
            for j, pj in enumerate(P):
                d = ssq(ni, pj, quantiles, toquantiles)
                dist[i, j] = d

        np.where(dist == np.nanmin(dist))
        nId = np.where(dist == np.nanmin(dist))[0]
        pId = np.where(dist == np.nanmin(dist))[1]
        this = (np.mean(N[nId]), np.mean(P[pId]))
        nMin, nMax = N[[nId[0] - 10 if nId[0] - 10 > 0 else 0]][0], N[[nId[-1] + 10 if nId[-1] + 10 < 100 else 99]][0]
        pMin, pMax = P[[pId[0] - 10 if pId[0] - 10 > 0 else 0]][0], P[[pId[-1] + 10 if pId[-1] + 10 < 100 else 99]][0]
        # Adjust the edges:
        if pMin == min(P):
            pMin = min(P) * 0.8
        if pMin == 0:
            pMin = 0.0001
        if pMax == max(P):
            pMax = max(P) * 1.5
        if pMax > 1:
            pMax = 1
        if nMin == min(N):
            nMin = min(N) * 0.8
        if nMax == max(N):
            nMax = max(N) * 1.2
        N = np.linspace(nMin, nMax, 100)
        P = np.linspace(pMin, pMax, 100)
        print(str(" Current parameter state %s" % str(round(this[0], 3))).ljust(34, " ") + ";", end="")
        print(str(" %s" % str(round(this[1], 3))).ljust(8, " ") + "|", end="")
        print(str(" SSE = %s" % str(round(np.nanmin(dist), 3))).ljust(14, " ") + "|", end="\n")

    return (this)
 def choose(self):
     self.name = "Neg-binomial"
     if self.user_class == 'HF':
         peak_hours_for_number_of_requests_hf = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
         if self.hour in peak_hours_for_number_of_requests_hf:
             nbinom_n_size, nbinom_mu_mean = 0.470368548315641, 34.7861725808564
         else:
             nbinom_n_size, nbinom_mu_mean = .143761308534382, 14.158264589062
     elif self.user_class == 'HO':
         peak_hours_for_number_of_requests_ho = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
         if self.hour in peak_hours_for_number_of_requests_ho:
             nbinom_n_size, nbinom_mu_mean = 0.113993444740046, 1.04026982546095
         else:
             nbinom_n_size, nbinom_mu_mean = 0.0448640346452827, 0.366034837767499
     elif self.user_class == 'MF':
         peak_hours_for_number_of_requests_mf = [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
         if self.hour in peak_hours_for_number_of_requests_mf:
             nbinom_n_size, nbinom_mu_mean = 0.758889839349924, 4.83390315655562
         else:
             nbinom_n_size, nbinom_mu_mean = 0.314653746175354, 3.22861572712093
     elif self.user_class == 'MO':
         peak_hours_for_number_of_requests_mo = [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
         if self.hour in peak_hours_for_number_of_requests_mo:
             nbinom_n_size, nbinom_mu_mean = 0.177211316065872, 0.406726610288464
         else:
             nbinom_n_size, nbinom_mu_mean = 0.0536955764781434, 0.124289074773539
     elif self.user_class == 'LF':
         peak_hours_for_number_of_requests_lf = [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
         if self.hour in peak_hours_for_number_of_requests_lf:
             nbinom_n_size, nbinom_mu_mean = 0.480203280455517, 0.978733578849008
         else:
             nbinom_n_size, nbinom_mu_mean = 0.240591506072217, 0.487956906502501
     elif self.user_class == 'LO':
         peak_hours_for_number_of_requests_lo = [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
         if self.hour in peak_hours_for_number_of_requests_lo:
             nbinom_n_size, nbinom_mu_mean = 0.188551092877969, 0.111187768162793
         else:
             nbinom_n_size, nbinom_mu_mean = 0.0810585648991726, 0.0405013083716073
     else:
         raise Exception('The user class %s does not exist' % self.user_class)
     # From R's documentation: An alternative parametrization (often used in ecology) is by the
     #  _mean_ 'mu', and 'size', the _dispersion parameter_, where 'prob' = 'size/(size+mu)'
     nbinom_prob = nbinom_n_size / (nbinom_n_size + nbinom_mu_mean)
     return nbinom(nbinom_n_size, nbinom_prob)
Esempio n. 35
0
File: wig.py Progetto: hjanime/CSI
def density_nb( expanded, r, mean, strand ):
    '''
    expanded is the list of values.
    r, p follows the description in http://en.wikipedia.org/wiki/Negative_binomial_distribution
    mean = p*r/(1-p)
    mode = floor( p(r-1)/(1-p) )
    '''
    mean = float( mean )
    r = float(r)
    p = mean / (mean + r)
    mode = np.floor( p*(r-1)/(1-p ) )
    p = 1 - p #conform to the scipy definition
    nbinom = scist.nbinom( r, p )
    modep = nbinom.pmf( mode )
    factor = 1/modep
    leftwin = mode
    rightwin = mean * 3
    if strand == '-':
        temp = leftwin
        leftwin = rightwin
        rightwin = temp

    out = np.zeros_like( expanded )
    for i in range( expanded.shape[0] ):
        count = expanded[i]
        if count > 0:
            start = max( 0, i - leftwin )
            end = min( expanded.shape[0], i + rightwin )
            for j in range( int(start), int(end) ):
                k = j - i + mode
                if strand == '-':
                    k = mode - j + i
                out[ j ] += factor*count*nbinom.pmf( k )
    expanded.resize( 100000, refcheck=False)
    expanded.resize( 0, refcheck=False)

    return out
Esempio n. 36
0
 def likelihood(self, event_mark, dt):
     r = np.array(event_mark * self.r * event_mark.T)
     p = np.array(event_mark * self.p * event_mark.T)
     return stats.nbinom(r, 1. - p).pmf(dt.astype(int))
Esempio n. 37
0
 def dist(self, i, j):
     p = self.p[i, j]
     r = self.r[i, j]
     return stats.nbinom(r, 1. - p)
Esempio n. 38
0
# Poisson-distribution expected frequencies :
px         = poisson(num, mu)          # compute probabilities
ps_expfreq = px * len(ants)            # computes expected frequencies

#===============================================================================
# MLE estimation for negative-binomial distribution :

# Starting values for (r, p)
r0 = (mu + mu**2) / sigma**2
p0 = r0 / (mu + r0)

out = minimize(negbinlike, [r0, p0], args=(ants,), method='L-BFGS-B') 

r, p = out['x']                   # MLE

nbin       = nbinom(r, p)         # n-bin object
bx         = nbin.pmf(num)        # probabilities
nb_expfreq = bx * len(ants)       # expected frequency

#===============================================================================
# plotting :
fig      = figure()
ax       = fig.add_subplot(111)

ax.hist(ants, max(ants), color='0.4', histtype='stepfilled')
ax.plot(num + .5, nb_expfreq, 'ko', label='Neg-Bin expected freq')
ax.plot(num + .5, ps_expfreq, 'rs', label='Poisson expected freq')
ax.set_xlabel('# fireants per 50-meter square plot')
ax.set_ylabel('Frequency')
ax.set_title('Histogram of Fire-Ant Hill Counts')
ax.legend(loc='center right')
def make_nbinom(mu, sigmasq):
  p = 1.0 - mu/sigmasq
  r = mu * (1.0-p) / p
  return nbinom(r,1-p)
Esempio n. 40
0
dist0 = stats.nbinom(n, p)
y = dist0.rvs(size=nobs)
x = np.ones(nobs)
"""
y = len_list
x = np.ones(len(len_list))

loglike_method = 'nb1'  # or use 'nb2'
res = sm.NegativeBinomial(y, x, loglike_method=loglike_method).fit(start_params=[0.1, 0.1])

#print dist0.mean()
print res.params

mu = res.predict()   # use this for mean if not constant
mu = np.exp(res.params[0])   # shortcut, we just regress on a constant
alpha = res.params[1]

if loglike_method == 'nb1':
    Q = 1
elif loglike_method == 'nb2':    
    Q = 0

size = 1. / alpha * mu**Q
prob = size / (size + mu)

#print 'data generating parameters', n, p
print 'estimated params          ', size, prob

#estimated distribution
dist_est = stats.nbinom(size, prob)
Esempio n. 41
0
classifier.likelihood('bad anchortext', bad_anchortext, p_ifyes=0.005, p_ifno=0.3)

def good_linkcontext(doc):
    pat = re.compile(r'penultimate|draft|forthcoming')
    return pat.search(doc.link.context.lower())
classifier.likelihood('good link context', good_linkcontext, p_ifyes=0.2, p_ifno=0.05)

def course_words(doc):
    # note: 'course' is also common in 'of course', 'in the course of',
    # 'essay' is common in discussions of Locke
    pat = re.compile(r'seminar|schedule|readings|textbook|students|handout|\bweek|hours/', re.I) 
    # normalize all measures to 10000 word documents (i.e., here we
    # return the number of matches per 10000 words):
    return int(len(pat.findall(doc.content)) * 10000 / doc.numwords)
classifier.likelihood('course note words', course_words,
                      p_ifyes=nbinom(1, 0.8), p_ifno=nbinom(2, 0.2))

def paper_words(doc):
    pat = re.compile(r'in section|finally,', re.I)
    return int(len(pat.findall(doc.content)) * 10000 / doc.numwords)
classifier.likelihood('typical paper words', paper_words,
                      p_ifyes=nbinom(2, 0.3), p_ifno=nbinom(1, 0.6))

def interview_words(doc):
    pat = re.compile(r'interview|do you', re.I)
    return int(len(pat.findall(doc.content)) * 10000 / doc.numwords)
classifier.likelihood('interview words', interview_words,
                      p_ifyes=nbinom(1, 0.8), p_ifno=nbinom(1, 0.2))

def verbs(doc):
    # bibliographies and other lists don't contain many verbs
Esempio n. 42
0
def in_beginning(regex):
    reg = re.compile(regex, re.I)
    def check(doc):
        if not doc.content:
            return Ellipsis
        beginning = doc.content[:5000]
        return reg.search(beginning)
    return check


# =========================================================================

bookfilter = BinaryNaiveBayes(prior_yes=0.2)

bookfilter.likelihood('numwords', length, 
                      p_ifyes=nbinom(7, 0.0001), p_ifno=nbinom(1, 0.0001))

# TODO: add more features? "Acknowledgements" section? Occurrences of
# "this book" TOC? Index? ...

# =========================================================================

chapterfilter = BinaryNaiveBayes(prior_yes=0.2)

chapterfilter.likelihood('numwords', length, 
                         p_ifyes=nbinom(2, 0.0002), p_ifno=nbinom(3, 0.0002))

chapterfilter.likelihood('"chapter" occurs in link context', in_context('chapter'),
                         p_ifyes=0.7, p_ifno=0.05)

# TODO: add features?