Exemple #1
0
    def scoreregex(self, r, depth=0):
        if depth < maxDepth:
            logp_regex = self.logp_regex
        else:
            logp_regex = self.logp_regex_no_recursion

        if r in self.character_classes:
            return logp_regex[r]
        else:
            R = type(r)
            p = logp_regex[R]
            if R == pre.String:
                return p + pre.Plus(pre.dot, p=0.3).match(r.arg)
            elif R == pre.Concat:
                n = len(r.values)
                return p + geom(0.8, loc=1).logpmf(n) + sum(
                    [self.scoreregex(s, depth=depth + 1) for s in r.values])
            elif R == pre.Alt:
                n = len(r.values)
                if all(x == r.ps[0] for x in r.ps):
                    param_score = math.log(1 / 2)
                else:
                    param_score = math.log(1 / 2) - (len(r.ps) + 1)  #~AIC
                return p + geom(0.8, loc=1).logpmf(n) + param_score + sum(
                    [self.scoreregex(s, depth=depth + 1) for s in r.values])
            elif R in [pre.KleeneStar, pre.Plus, pre.Maybe]:
                return p + self.scoreregex(r.val, depth=depth + 1)
Exemple #2
0
	def scoreregex(self, r, trace, depth=0):
		if depth==0:
			logp_regex = self.logp_regex_no_concepts
		elif depth==maxDepth:
			logp_regex = self.logp_regex_no_recursion if trace.baseConcepts else self.logp_regex_no_concepts_no_recursion
		else:
			logp_regex = self.logp_regex if trace.baseConcepts else self.logp_regex_no_concepts

		if type(r) is RegexWrapper and r.concept in trace.baseConcepts:
			return logp_regex[CONCEPT]# + trace.logpConcept(r.concept) deal with this in trace._addConcept
		elif r in self.character_classes:
			return logp_regex[r]
		else:
			R = type(r)
			p = logp_regex[R]
			if R == pre.String:
				return p + pre.Plus(pre.dot, p=0.3).match(r.arg)
			elif R == pre.Concat:
				n = len(r.values)
				return p + geom(0.8, loc=1).logpmf(n) + sum([self.scoreregex(s, trace, depth=depth+1) for s in r.values])
			elif R == pre.Alt:
				n = len(r.values)
				if all(x==r.ps[0] for x in r.ps):
					param_score = math.log(1/2)
				else:
					param_score = math.log(1/2) - (len(r.ps)+1) #~AIC
				return p + geom(0.8, loc=1).logpmf(n) + param_score + sum([self.scoreregex(s, trace, depth=depth+1) for s in r.values])
			elif R in [pre.KleeneStar, pre.Plus, pre.Maybe]:
				return p + self.scoreregex(r.val, trace, depth=depth+1)
Exemple #3
0
def generate(tau, theta):
    f = [stats.geom(theta[0]), stats.geom(theta[1])]
    while True:
        if np.random.uniform(0, 1) < tau[0]:
            dist = 0
        else:
            dist = 1
        yield f[dist].rvs(1)[0]
Exemple #4
0
def recalc_log_gt_posteriors(log_gt_priors, down, up, p_geom, read_counts_array, nalleles, allele_sizes, diploid=False, norm=False):
    stutter_dist = geom(p_geom)
    nsamples     = read_counts_array.shape[0]
    log_down, log_eq, log_up = map(numpy.log, [down, 1-down-up, up])
    if diploid:
        num_gts = nalleles**2
        LLs = numpy.zeros((nsamples, num_gts)) + log_gt_priors
        gtind = 0
        for a1 in xrange(nalleles):
            for a2 in xrange(nalleles):
                if a1 != a2 and DEBUG_HAPLOID:
                    LLs[:,gtind] = numpy.log(0)
                    gtind += 1
                    continue
                step_probs1 = numpy.hstack(([log_down + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[a1])) for x in range(0, a1)],
                                            [log_eq],
                                            [log_up   + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[a1])) for x in range(a1+1, nalleles)]))
                step_probs2 = numpy.hstack(([log_down + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[a2])) for x in range(0, a2)],
                                            [log_eq],
                                            [log_up   + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[a2])) for x in range(a2+1, nalleles)]))
                step_probs = numpy.logaddexp(step_probs1+log_one_half, step_probs2+log_one_half)
                LLs[:,gtind] += numpy.sum(read_counts_array*step_probs, axis=1)
#                if a1 == a2: LLs[:,gtind]+= numpy.log(2) # account for phase
                gtind += 1
    else:
        LLs      = numpy.zeros((nsamples, nalleles)) + log_gt_priors
        for j in xrange(nalleles):
            step_probs = numpy.hstack(([log_down + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[j])) for x in range(0, j)],
                                       [log_eq],
                                       [log_up   + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[j])) for x in range(j+1, nalleles)]))
            LLs [:,j] += numpy.sum(read_counts_array*step_probs, axis=1)
    if norm: return numpy.sum(logsumexp(LLs, axis=1))
    else:
        log_samp_totals = logsumexp(LLs, axis=1)[numpy.newaxis].T
        return LLs - log_samp_totals
Exemple #5
0
def lag_distribution(durations, expo=0.4):
    binned = np.bincount(durations.duration)
    normalized_binned = binned.astype(float) / durations.duration.count()
    geom_dist = stats.geom(expo)
    expected_values = geom_dist.pmf(np.arange(len(normalized_binned)))
    df = pd.DataFrame({"Duration frequency": normalized_binned, "Expected values": expected_values})
    return df
Exemple #6
0
    def construct_matrix(self, down, up, p_geom, min_allele, max_allele):
        self.log_down = numpy.log(down)
        self.log_eq = numpy.log(1.0 - down - up)
        self.log_up = numpy.log(up)
        self.p_geom = p_geom
        self.min_allele = min_allele
        self.max_allele = max_allele
        self.nalleles = self.max_allele - self.min_allele + 1
        self.stutter_dist = geom(self.p_geom)

        # Construct matrix where each row contains the stutter transition probabilites for a particular allele
        for j in xrange(self.nalleles):
            allele_probs = numpy.hstack(([
                self.log_down + self.stutter_dist.logpmf(j - x)
                for x in range(0, j)
            ], [self.log_eq], [
                self.log_up + self.stutter_dist.logpmf(x - j)
                for x in range(j + 1, self.nalleles)
            ]))
            if j == 0:
                step_probs = allele_probs
            else:
                step_probs = numpy.vstack((step_probs, allele_probs))
        if self.nalleles == 1:
            step_probs = numpy.expand_dims(step_probs, axis=0)
        self.step_probs = step_probs
Exemple #7
0
def plot_geometric_fit(data,
                       fit_results,
                       title=None,
                       x_label=None,
                       x_range=None,
                       y_range=None,
                       fig_size=(6, 5),
                       bin_width=1,
                       filename=None):
    """
    :param data: (numpy.array) observations
    :param fit_results: dictionary with keys "p" and "loc"
    :param title: title of the figure
    :param x_label: label to show on the x-axis of the histogram
    :param x_range: (tuple) x range
    :param y_range: (tuple) y range
        (the histogram shows the probability density so the upper value of y_range should be 1).
    :param fig_size: int, specify the figure size
    :param bin_width: bin width
    :param filename: filename to save the figure as
    """

    plot_fit_discrete(data=data,
                      dist=stat.geom(p=fit_results['p'],
                                     loc=fit_results['loc']),
                      label='Geometric',
                      bin_width=bin_width,
                      title=title,
                      x_label=x_label,
                      x_range=x_range,
                      y_range=y_range,
                      fig_size=fig_size,
                      filename=filename)
Exemple #8
0
def plot_means(d, params, fig, ax, repeat, size=200):

    result = None
    means = []

    for i in range(repeat):

        if d == 'Poisson':
            result = stats.poisson(**params).rvs(size)
            means.append(result.mean())
        elif d == 'Binomial':
            result = stats.binom(**params).rvs(size)
            means.append(result.mean())
        elif d == 'Exponential':
            result = stats.expon(**params).rvs(size)
            means.append(result.mean())
        elif d == 'Geometric':
            result = stats.geom(**params).rvs(size)
            means.append(result.mean())
        elif d == 'Uniform':
            result = stats.uniform(**params).rvs(size)
            means.append(result.mean())

    ax = fig.add_subplot(ax[0], ax[1], ax[2])
    ax.hist(means, bins=100)
    ax.set_title(f"{d}-repeat:{repeat}-size:{size}")
Exemple #9
0
def initGeometric(init_strat, bias):
    if isinstance(init_strat, AugurDefault):
        return 1
    elif isinstance(init_strat, AugurRandom):
        return sps.geom(bias).rvs()
    else:
        raise False
Exemple #10
0
def probability_of_exiting(states, n_tests):
    score = []
    if states.finite_horizon:
        game_horizon = states.time_horizon
    elif states.infinite_horizon_discounted:
        time_horizon_distribution = geom(p=1 / states.lifetime_mean)

    for _ in range(n_tests):

        if states.infinite_horizon_discounted:
            game_horizon = time_horizon_distribution.rvs()
        state = states.initial_state
        t = 0
        while True:
            if state.losing():
                score.append(False)
                break
            elif state.winning():
                score.append(True)
                break
            elif t == game_horizon - 1:
                score.append(False)
                break

            if states.finite_horizon:
                action = state.player.action[t]
            else:
                action = state.player.action

            state = choice(states.possible_next_states(state, action))
            t += 1

    return sum(score) / n_tests
Exemple #11
0
 def get_param_distributions(self, X, y):
     return super().get_param_distributions({
         'polynomialfeatures__degree': [1, 2],
         'pca__n_components':
         list(range(1, X.shape[1])),
         'randomforestclassifier__n_estimators':
         geom(1. / 100)
     })
Exemple #12
0
 def get_param_distributions(self, X, y):
     return super().get_param_distributions({
         'kernelridge__alpha':
         expon(0, 1),
         'kernelridge__degree':
         geom(.5, loc=1),
         'kernelridge__kernel': ['linear', 'poly', 'rbf', 'laplacian']
     })
Exemple #13
0
 def get_param_distributions(self, X, y):
     return super().get_param_distributions({
         'svr__C':
         expon(0, 1),
         'svr__degree':
         geom(.3),
         'svr__kernel': ['linear', 'poly', 'rbf'],
     })
 def create_distribution(self):
     """Creates the CP distribution using the object properties. 
     
     NOTE: At this point, it just operates with the geometric distribution.
     Once g and g_0 are allowed as input, this function handles the more
     general case, too."""
     
     return stats.geom(self.intensity)
Exemple #15
0
 def get_param_distributions(self, X, y):
     return super().get_param_distributions({
         'polynomialfeatures__degree': [1, 2],
         'pca__n_components':
         list(range(1, X.shape[1])),
         'adaboostregressor__n_estimators':
         geom(1. / 2**5)
     })
Exemple #16
0
    def __init__(self, p: float) -> None:
        """
        Constructor for an object of type binomial.
        """

        super().__init__(a_dist=stats.geom(p))
        self._lower = 1
        self._p = p
Exemple #17
0
 def get_param_distributions(self, X, y):
     return super().get_param_distributions({
         'polynomialfeatures__degree': [1, 2],
         'pca__n_components':
         _get_n_pca_components_distribution(X),
         'adaboostregressor__n_estimators':
         geom(1. / 2**5)
     })
Exemple #18
0
def recalc_stutter_params(log_gt_posteriors,
                          read_counts,
                          nalleles,
                          allele_sizes,
                          down,
                          up,
                          pgeom,
                          max_stutter,
                          diploid=False):
    # Pre-calculate stutter probabilities for old model
    stutter_dist = geom(pgeom)
    stutter_probs = [stutter_dist.logpmf(i) for i in range(1, max_stutter + 1)]
    # Set up counts
    nsamples = log_gt_posteriors.shape[0]
    log_counts = [[0], [0], [0]]  # Pseudocounts
    log_diffs = [0, numpy.log(2)]  # Step sizes of 1 and 2, so that p_geom < 1
    if diploid:
        for i in xrange(nsamples):
            gtind = 0
            for a1 in xrange(nalleles):
                for a2 in xrange(nalleles):
                    log_post = log_gt_posteriors[i][gtind]
                    #                    print i, down, up, pgeom, (allele_sizes[a1], allele_sizes[a2]), numpy.exp(log_post), dict([(allele_sizes[r], read_counts[i][r]) for r in read_counts[i]])
                    for read_index, count in read_counts[i].items():
                        log_count = numpy.log(count)
                        diff1 = allele_sizes[read_index] - allele_sizes[a1]
                        diff2 = allele_sizes[read_index] - allele_sizes[a2]
                        phase_posts = GetReadPhasePosts(allele_sizes[a1], allele_sizes[a2], \
                                                            allele_sizes[read_index], down, up, stutter_probs)
                        diffs = [diff1, diff2]
                        #                        print allele_sizes[read_index], allele_sizes[a1], allele_sizes[a2], diffs, numpy.exp(phase_posts), numpy.exp(log_post)
                        for j in range(len(diffs)):
                            if diffs[j] != 0:
                                log_diffs.append(log_count + log_post +
                                                 phase_posts[j] +
                                                 numpy.log(abs(diffs[j])))
                            log_counts[numpy.sign(diffs[j]) +
                                       1].append(log_post + phase_posts[j] +
                                                 log_count)
                    gtind += 1
    else:
        for i in xrange(nsamples):
            for j in xrange(nalleles):
                log_post = log_gt_posteriors[i][j]
                for read_index, count in read_counts[i].items():
                    log_count = numpy.log(count)
                    diff = allele_sizes[read_index] - allele_sizes[j]
                    if diff != 0:
                        log_diffs.append(log_count + log_post +
                                         numpy.log(abs(diff)))
                    log_counts[numpy.sign(diff) + 1].append(log_post +
                                                            log_count)
    log_tot_counts = map(logsumexp, log_counts)
    p_hat = numpy.exp(
        logsumexp([log_tot_counts[0], log_tot_counts[2]]) -
        logsumexp(log_diffs))
    log_freqs = log_tot_counts - logsumexp(log_tot_counts)
    return numpy.exp(log_freqs[0]), numpy.exp(log_freqs[2]), p_hat
Exemple #19
0
 def get_param_distributions(self, X, y):
     return super().get_param_distributions({
         'polynomialfeatures__degree': [1, 2],
         'pca__n_components':
         list(range(1, X.shape[1])),
         'kneighborsregressor__n_neighbors':
         geom(1 / (.05 * X.shape[0])),
         'kneighborsregressor__weights': ['uniform', 'distance']
     })
Exemple #20
0
 def get_param_distributions(self, X, y):
     return super().get_param_distributions({
         'polynomialfeatures__degree': [1, 2],
         'pca__n_components':
         _get_n_pca_components_distribution(X),
         'kneighborsclassifier__n_neighbors':
         geom(1 / (.05 * X.shape[0])),
         'kneighborsclassifier__weights': ['uniform', 'distance']
     })
Exemple #21
0
def lag_distribution(durations, expo=0.4):
    binned = np.bincount(durations.duration)
    normalized_binned = (binned.astype(float) / durations.duration.count())
    geom_dist = stats.geom(expo)
    expected_values = geom_dist.pmf(np.arange(len(normalized_binned)))
    df = pd.DataFrame({
        'Duration frequency': normalized_binned,
        'Expected values': expected_values
    })
    return df
Exemple #22
0
 def test_rvs(self):
     vals = stats.geom.rvs(0.75, size=(2, 50))
     assert numpy.all(vals >= 0)
     assert numpy.shape(vals) == (2, 50)
     assert vals.dtype.char in typecodes["AllInteger"]
     val = stats.geom.rvs(0.75)
     assert isinstance(val, int)
     val = stats.geom(0.75).rvs(3)
     assert isinstance(val, numpy.ndarray)
     assert val.dtype.char in typecodes["AllInteger"]
Exemple #23
0
def test_entropy():
    """
    Test entropy.
    """
    geom_benchmark = stats.geom(0.7)
    expect_entropy = geom_benchmark.entropy().astype(np.float32)
    entropy = EntropyH()
    output = entropy()
    tol = 1e-6
    assert (np.abs(output.asnumpy() - expect_entropy) < tol).all()
 def test_rvs(self):
     vals = stats.geom.rvs(0.75, size=(2, 50))
     assert_(numpy.all(vals >= 0))
     assert_(numpy.shape(vals) == (2, 50))
     assert_(vals.dtype.char in typecodes['AllInteger'])
     val = stats.geom.rvs(0.75)
     assert_(isinstance(val, int))
     val = stats.geom(0.75).rvs(3)
     assert_(isinstance(val, numpy.ndarray))
     assert_(val.dtype.char in typecodes['AllInteger'])
def get_iti_distribution(n_trials, p, rs):
    """Return a vector of ITIs (in TR units) for each trial."""
    x = np.arange(*p.eff_geom_support)
    iti_pmf = stats.geom(p.eff_geom_p, loc=p.eff_geom_loc).pmf(x)
    iti_counts = np.round((iti_pmf / iti_pmf.sum()) * n_trials)
    iti_counts[0] += (n_trials - iti_counts.sum())

    iti_trs = [np.repeat(x_i, c) for x_i, c in zip(x, iti_counts)]
    iti_trs = np.concatenate(iti_trs)
    return iti_trs
Exemple #26
0
def gencsr(shape=(NUMNODES, NUMNODES), density=0.05, fname="random_graphs/"):
    print("generating density: ", density)
    MAX_IN_SHARD = int(10**8.7)  #cannot do 1e9
    numpoints2gen = MAX_IN_SHARD
    # DEBUG statements
    # numpoints2gen = 15
    # shape=(6,6)
    # density=0.5
    # numpoints2gen = int(shape[0]*(shape[0]-1)/2)
    # END DEBUG
    actualnumpoints = int(shape[0] * (shape[0] - 1) / 2)
    d = geom(density)
    points = d.rvs((numpoints2gen, )).astype(
        np.int64)  # TODO I would do uint64 but it is not supported on GPU

    # note, this only generates values strictly above the diagonal
    mvalue = shape[0] - 1
    incrs = np.ones(mvalue) * mvalue - np.arange(mvalue)
    row_dense_upper_starts = np.zeros(shape[0] + 1).astype(np.int64)
    row_dense_upper_starts[1:-1] = incrs.cumsum()
    row_dense_upper_starts[-1] = row_dense_upper_starts[
        -2]  #monkey patching because we need there to be a sentinal element

    points[0] -= 1
    points_dense_upper_idx = points.cumsum()
    lastreal = np.searchsorted(points_dense_upper_idx,
                               row_dense_upper_starts[-3] + 1)
    points_dense_upper_idx = points_dense_upper_idx[:lastreal]
    print(lastreal / actualnumpoints)

    row_csr_starts = np.searchsorted(points_dense_upper_idx,
                                     row_dense_upper_starts).astype(np.int64)

    point2row = (
        np.searchsorted(row_dense_upper_starts, points_dense_upper_idx + 1) -
        1).astype(np.int64)
    col_csr = points_dense_upper_idx - row_dense_upper_starts[point2row]
    col_csr += np.arange(shape[0]).astype(np.int64)[point2row] + 1

    with open(fname + str(shape[0]) + '_' + str(density) + "csr_cols.bin",
              "wb") as f:
        f.write(col_csr.tobytes())

    with open(fname + str(shape[0]) + '_' + str(density) + "csr_rows.bin",
              "wb") as f:
        f.write(row_csr_starts.tobytes())

    checkcsr(col_csr, row_csr_starts, shape)

    # print("lengh of row_csr_starts",row_csr_starts.shape)
    # print(row_csr_starts)
    # pdb.set_trace()

    return (row_csr_starts, col_csr)
Exemple #27
0
 def get_param_distributions(self, X, y):
     return super().get_param_distributions({
         'polynomialfeatures__degree': [1, 2],
         'pca__n_components':
         _get_n_pca_components_distribution(X),
         'svc__C':
         expon(0, 1),
         'svc__degree':
         geom(.3),
         'svc__kernel': ['linear', 'poly', 'rbf'],
     })
Exemple #28
0
def other_Q2(S0, p, mu0, sigma0, mu1, sigma1, rho, r, b, gamma, delta, n0, N0):

    burn_in = n0
    num_sample = N0

    z = sct.norm.ppf(1 - delta / 2)  # 1 - delta/2 quantile of N(0, 1)
    r_star = 1 - pow(2, -1.5)  # optimal success rate for the geometric of N

    confidence_interval = float('inf')
    running_mean = 0
    running_2moment = 0
    num_estimator = 0  #count of number of estimators generated

    CIs = np.zeros((1, num_sample))
    estimation = np.zeros((1, num_sample))

    while (num_estimator < num_sample or confidence_interval >= delta):
        N = np.random.geometric(p=r_star)
        samples = sampler(riskless, risky, N, S0, n0, p, mu0, sigma0, mu1,
                          sigma1, rho, r, b)
        samples_odd = samples[0::2]
        samples_even = samples[1::2]
        samples_n_0 = samples[0:pow(2, n0)]

        theta_N = np.mean(samples)
        theta_N_odd = np.mean(samples_odd)
        theta_N_even = np.mean(samples_even)
        theta_n_0 = np.mean(samples_n_0)

        X_star = (theta_N - (theta_N_odd + theta_N_even) /
                  2) / sct.geom(r_star).pmf(N + 1) + theta_n_0
        running_mean = (running_mean * num_estimator +
                        X_star) / (num_estimator + 1)
        running_2moment = (running_2moment * num_estimator +
                           pow(X_star, 2)) / (num_estimator + 1)

        sample_std = math.sqrt(running_2moment - pow(running_mean, 2))
        num_estimator = num_estimator + 1
        confidence_interval = z * sample_std / (math.sqrt(num_estimator))
        estimation[:, num_estimator - 1] = running_mean
        CIs[:, num_estimator - 1] = confidence_interval

    lower = estimation - CIs
    upper = estimation + CIs
    print('Generate', num_estimator, 'samples \n')

    n_range = np.arange(burn_in - 1, num_sample)
    plt.plot(n_range, estimation[0, n_range], label='estimation')
    plt.plot(n_range, lower[0, n_range], label='lower CI')
    plt.plot(n_range, upper[0, n_range], label='upper CI')
    plt.legend(loc='upper right')
    plt.show()

    return running_mean, confidence_interval
Exemple #29
0
def Geometric(p, tag=None):
    """
    A Geometric random variate
    
    Parameters
    ----------
    p : scalar
        The probability of success
    """
    assert 0<p<1, 'Geometric probability "p" must be between zero and one, non-inclusive'
    return uv(ss.geom(p), tag=tag)
Exemple #30
0
def Geometric(p, tag=None):
    """
    A Geometric random variate
    
    Parameters
    ----------
    p : scalar
        The probability of success
    """
    assert 0 < p < 1, 'Geometric probability "p" must be between zero and one, non-inclusive'
    return uv(ss.geom(p), tag=tag)
Exemple #31
0
 def get_param_distributions(self, X, y):
     return super().get_param_distributions({
         'polynomialfeatures__degree': [1, 2],
         'pca__n_components':
         list(range(1, X.shape[1])),
         'svr__C':
         expon(0, 1),
         'svr__degree':
         geom(.3),
         'svr__kernel': ['linear', 'poly', 'rbf'],
     })
Exemple #32
0
 def get_param_distributions(self, X, y):
     return super().get_param_distributions({
         'polynomialfeatures__degree': [1, 2],
         'pca__n_components':
         list(range(1, X.shape[1])),
         'kernelridge__alpha':
         expon(0, 1),
         'kernelridge__degree':
         geom(.5, loc=1),
         'kernelridge__kernel': ['linear', 'poly', 'rbf', 'laplacian']
     })
Exemple #33
0
def test_log_likelihood():
    """
    Test log_pmf.
    """
    geom_benchmark = stats.geom(0.7)
    expect_logpmf = geom_benchmark.logpmf([1, 2, 3, 4, 5]).astype(np.float32)
    logprob = LogProb()
    x_ = Tensor(np.array([0, 1, 2, 3, 4]).astype(np.int32),
                dtype=dtype.float32)
    output = logprob(x_)
    tol = 1e-6
    assert (np.abs(output.asnumpy() - expect_logpmf) < tol).all()
 def test_NBinom_to_Geometric(self):
     exp_list, obs_list = [], []
     X = NegativeBinomial(r=1, p=0.8)
     sims = X.sim(Nsim)
     simulated = sims.tabulate()
     for k in range(10):
         expected = Nsim * stats.geom(p=0.8).pmf(k)
         if expected > 5:
             exp_list.append(expected)
             obs_list.append(simulated[k])
     pval = stats.chisquare(obs_list, exp_list).pvalue
     self.assertTrue(pval > 0.01)
Exemple #35
0
def UpdateK(k_old,z_old,T,lambda_old,rou,u,phi,alpha,iterNum,eta):
	k_new =  []
	z_new =  []
	for t in range(T-1):
		dK = stats.binom(1,0.5)
		temp = dK.rvs(1)
		d_k = 0
		if temp[0] ==0:
			d_k = 1
		else:
			d_k = -1

		epsilon_K = stats.geom(1.0/(1+z_old[t]))
		epsilon = epsilon_K.rvs(1)

		k_new_temp = k_old[t]+d_k*epsilon[0]


		# step 3
		if k_new_temp < 0:
			k_new.append(k_old[t])

			z_new.append(z_old[t])

		else:
			p_k = (lambda_old/((1-rou)*u))**k_old[t]*(phi[t]*lambda_old*rou/((1-rou)*u))**k_old[t]*\
		      phi[t+1]/(math.factorial(k_old[t])*spec.gamma(lambda_old+k_old[t]))

			p_k_new = (lambda_old/((1-rou)*u))**k_new_temp*(phi[t]*lambda_old*rou/((1-rou)*u))**k_new_temp*\
		          phi[t+1]/(math.factorial(k_new_temp)*spec.gamma(lambda_old+k_new_temp))

			ap = min(1,p_k_new/p_k)

			y_AP = stats.binom(1,ap)
			temp = y_AP.rvs(1)

			if temp[0] ==0:
				k_new.append(k_new_temp)
			else:
				k_new.append(k_old[t])

			temp_z = z_old[t]+ iterNum**(-1.0*eta)*(ap-alpha)
			z_new.append(temp_z)

		# step 4

	print "k z new:\n"
	print k_new
	print z_new
	print "\n"

	return  k_new, z_new
Exemple #36
0
def UpdateK_sigma(k_sigma_old,z_sigma_old,lambda_sigma,rou_sigma,u_sigma,sigma_2,iterNum,eta,alpha,T):

	k_sigma_new = []
	z_sigma_new = []

	for t in range(T-1):

		dK = stats.binom(1,0.5)
		temp = dK.rvs(1)
		d_k = 0
		if temp[0] ==0:
			d_k = 1
		else:
			d_k =-1

		epsilon_K = stats.geom(1.0/(1+z_sigma_old[t]))
		epsilon = epsilon_K.rvs(1)

		k_sigma_new_temp = k_sigma_old[t]+d_k*epsilon[0]

		# step 3
		if k_sigma_new_temp < 0:
			k_sigma_new.append(k_sigma_old[t])
			z_sigma_new.append(z_sigma_old[t])
		else:
			# step 2
			p_k_sigma = (lambda_sigma/((1-rou_sigma)*u_sigma))**k_sigma_old[t]*\
		            (sigma_2[t]*lambda_sigma*rou_sigma/((1-rou_sigma)*u_sigma))**k_sigma_old[t]\
		            *(sigma_2[t+1])**k_sigma_old[t]/(math.factorial(k_sigma_old[t])*spec.gamma(lambda_sigma+k_sigma_old[t]))

			p_k_sigma_new = (lambda_sigma/((1-rou_sigma)*u_sigma))**k_sigma_new_temp*\
		            (sigma_2[t]*lambda_sigma*rou_sigma/((1-rou_sigma)*u_sigma))**k_sigma_new_temp\
		            *(sigma_2[t+1])**k_sigma_new_temp/(math.factorial(k_sigma_new_temp)*spec.gamma(lambda_sigma+k_sigma_new_temp))


			ap = min(1,p_k_sigma_new/p_k_sigma)

			y_AP = stats.binom(1,ap)
			temp = y_AP.rvs(1)
			if temp[0] ==0:
				k_sigma_new.append(k_sigma_new_temp)
			else:
				k_sigma_new.append(k_sigma_old[t])

			# step 4
			temp_z = z_sigma_old[t]+ iterNum**(-1.0*eta)*(ap-alpha)
			z_sigma_new.append(temp_z)



	return  k_sigma_new,z_sigma_new
Exemple #37
0
    def test_pageout_maintains_size(self):
        self.uut = MMCPolicyOne(
                cache_size_limit=10, full_cache_size_limit=20,
                trace_size_limit=15)
        g = stats.geom(0.05)
        # Fill up the cache.
        for page in range(100):
            self.uut.request(page)

        # Request some cache hits and some cache misses.
        for page in range(5) + range(100, 105):
            self.uut.request(page)
            self.assertEqual(len(self.uut.cache_list), 10)
            self.assertEqual(len(self.uut.full_cache), 20)
            self.assertEqual(len(self.uut.trace), 15)
    def test_normalizations(self):
        emissions = np.ones((3, 7))
        tmat = np.eye(3)
        durations = [geom(0.3)] * 3
        support_cutoff = 2

        hsmm = HSMMModel(
            MultinomialEmissions(emissions),
            durations, tmat, support_cutoff=support_cutoff
        )

        expected_durations = np.empty((3, 2))
        expected_durations[:, 0] = 0.58823529
        expected_durations[:, 1] = 0.41176471
        np.testing.assert_array_almost_equal(
            hsmm._durations, expected_durations
        )
    def __init__(self, obs_pts=None, cmplx=None, 
                 gamma=.9, lmbda=.2, use_gp=True,
                 obs_sigma=OBS_SIGMA, propose_sigma=.0005, birth_sigma=.1,
                 d=2, obs=None, N=None, P=None, n_clusters_init=5):
        """
        gamma: geometric variable for prior on number of simplices
        sigma_sq: variance of 
        d: dimension of embedding space
        """

        assert not (obs_pts is None and cmplx is None)

        self.gamma = gamma
        self.N_prior = geom(gamma)

        self.d = d
        self.lmbda = lmbda
        self.len_prior = expon(self.lmbda)

        self.propose_mvn = mvn(np.zeros(self.d), propose_sigma*np.eye(self.d))
        self.obs_sigma=obs_sigma
        self.obs_dist = norm(loc=0, scale=obs_sigma)

        self.birth_proposal = norm(loc=0, scale=birth_sigma)

        self.use_gp = use_gp

        self.cmplx = cmplx
        if self.cmplx is None:
            # obs_pts is not None
            self.cmplx = SimplicialComplex()
            ## this is a 1d complex
            self.cmplx.initialize(obs_pts, 1, n_clusters=n_clusters_init)

        self.N = self.cmplx.simplex_count()

        if obs_pts is None:
#            self.sample_obs(self.N * 10)
            self.sample_obs(self.N * 100)
        else:
            self.observations = []
            for pt in obs_pts:
                self.observations.append(Obs(pt, self.cmplx))
Exemple #40
0
def recalc_stutter_params(log_gt_posteriors, read_counts, nalleles, allele_sizes, down, up, pgeom, max_stutter, diploid=False):
    # Pre-calculate stutter probabilities for old model
    stutter_dist = geom(pgeom)
    stutter_probs = [stutter_dist.logpmf(i) for i in range(1, max_stutter+1)]
    # Set up counts
    nsamples   = log_gt_posteriors.shape[0]
    log_counts = [[0], [0], [0]]   # Pseudocounts
    log_diffs  = [0, numpy.log(2)] # Step sizes of 1 and 2, so that p_geom < 1 
    if diploid:
        for i in xrange(nsamples):
            gtind = 0
            for a1 in xrange(nalleles):
                for a2 in xrange(nalleles):
                    log_post = log_gt_posteriors[i][gtind]
#                    print i, down, up, pgeom, (allele_sizes[a1], allele_sizes[a2]), numpy.exp(log_post), dict([(allele_sizes[r], read_counts[i][r]) for r in read_counts[i]])
                    for read_index, count in read_counts[i].items():
                        log_count = numpy.log(count)
                        diff1 = allele_sizes[read_index]-allele_sizes[a1]
                        diff2 = allele_sizes[read_index]-allele_sizes[a2]
                        phase_posts = GetReadPhasePosts(allele_sizes[a1], allele_sizes[a2], \
                                                            allele_sizes[read_index], down, up, stutter_probs)
                        diffs = [diff1, diff2]
#                        print allele_sizes[read_index], allele_sizes[a1], allele_sizes[a2], diffs, numpy.exp(phase_posts), numpy.exp(log_post)
                        for j in range(len(diffs)):
                            if diffs[j] != 0:
                                log_diffs.append(log_count+log_post+phase_posts[j]+numpy.log(abs(diffs[j])))
                            log_counts[numpy.sign(diffs[j])+1].append(log_post+phase_posts[j]+log_count)
                    gtind += 1
    else:
        for i in xrange(nsamples):
            for j in xrange(nalleles):
                log_post = log_gt_posteriors[i][j]
                for read_index,count in read_counts[i].items():
                    log_count = numpy.log(count)
                    diff      = allele_sizes[read_index] - allele_sizes[j] 
                    if diff != 0:
                        log_diffs.append(log_count + log_post + numpy.log(abs(diff)))
                    log_counts[numpy.sign(diff)+1].append(log_post + log_count)
    log_tot_counts = map(logsumexp, log_counts)
    p_hat          = numpy.exp(logsumexp([log_tot_counts[0], log_tot_counts[2]]) - logsumexp(log_diffs))
    log_freqs      = log_tot_counts - logsumexp(log_tot_counts)
    return numpy.exp(log_freqs[0]), numpy.exp(log_freqs[2]), p_hat
Exemple #41
0
def recDistribution():
    data = loadFeatures()
    recs = array(data['recs'], float)
    mu = numpy.average(recs)
    print mu
    dist = poisson(mu)
    dist2 = geom((1.0/mu))
    dist3 = pareto(recs)
    x = numpy.arange(1, numpy.amax(recs))
    h = plt.hist(recs, bins=range(40), normed=True)
    plt.plot(dist3[0], dist3[1], color='yellow', label='Pareto', linewidth=3)
    plt.plot(x, dist.pmf(x), color='black', label='Poisson', linewidth=3)
    plt.plot(x, dist2.pmf(x), color='red', label='Geometric', linewidth=3)
    plt.legend()
    plt.xlabel('Recommendation Count')
    plt.ylabel('Actual Value (% of Data) / Probability')
    plt.legend()
    plt.suptitle('Fitting Rec. Count')
    plt.xlim(0,40)
    plt.show()
    def __call__(self, options, pars):
        """Simulate process model to get predicted
        choice and sample size distributions"""

        start = time()

        N = pars.get('N', 500000)
        max_T = pars.get('max_T', 500)
        minsamplesize = pars.get('minsamplesize', 1) - 1
        p_stop_geom = pars.get('p_stop_geom', 0)
        fixed_dist = geom(p_stop_geom, loc=(minsamplesize - 1))

        outcomes = pars['obs']['outcomes']
        samplesize = outcomes.shape[0]
        fixed_p_stop = fixed_dist.pmf(samplesize - 1)

        p_stop_choose_A = fixed_p_stop * 0.5
        p_stop_choose_B = fixed_p_stop * 0.5

        return {'p_stop_choose_A': p_stop_choose_A,
                'p_stop_choose_B': p_stop_choose_B}
Exemple #43
0
    def construct_matrix(self, down, up, p_geom, min_allele, max_allele):
        self.log_down     = numpy.log(down)
        self.log_eq       = numpy.log(1.0-down-up)
        self.log_up       = numpy.log(up)
        self.p_geom       = p_geom
        self.min_allele   = min_allele
        self.max_allele   = max_allele
        self.nalleles     = self.max_allele - self.min_allele + 1
        self.stutter_dist = geom(self.p_geom)

        # Construct matrix where each row contains the stutter transition probabilites for a particular allele
        for j in xrange(self.nalleles):
            allele_probs = numpy.hstack(([self.log_down + self.stutter_dist.logpmf(j-x) for x in range(0, j)],
                                         [self.log_eq],
                                         [self.log_up   + self.stutter_dist.logpmf(x-j) for x in range(j+1, self.nalleles)]))
            if j == 0:
                step_probs = allele_probs
            else:
                step_probs = numpy.vstack((step_probs, allele_probs))
        if self.nalleles == 1:
            step_probs = numpy.expand_dims(step_probs,axis=0)
        self.step_probs = step_probs
Exemple #44
0
# -*- coding: utf-8 -*-

# By Vamei

from scipy.stats import geom
rv = geom(0.45)

x = np.arange(-1, 15, 1)
y = rv.pmf(x)

plt.bar(x-0.2, y, width=0.4)

plt.ylim([0, 0.5])
plt.title("geometric distribution")
plt.xlabel("RV")
plt.ylabel("P(X=x)")
plt.show()
Exemple #45
0
from termcolor import colored,cprint
import matplotlib.pyplot as plt

import numpy as np
from scipy.stats import geom
# Here set up the parameters for the geometric distribution. 

p = 0.5
dist = geom(p)
# Set up the sample range. 
x = np.linspace(0, 5, 10)
# Retrieving geom's PMF and CDF 
pmf = dist.pmf(x)
cdf = dist.cdf(x)
# Here we draw out 500 rand
print( colored( x,'green'),colored(dist,'red'), colored(pmf,'blue'),colored(cdf,'red'))
Exemple #46
0
 def func(self, x):
     p = self.p
     return geom(p).pmf(x)
Exemple #47
0
 def sample(self, N=None):
     p = self.p
     return geom(p).rvs(size=N, random_state=self.random)
    def __call__(self, options, pars, trackobs=True):
        """Simulate process model to get predicted
        choice and sample size distributions"""

        start = time()

        N = pars.get('N', 500)
        max_T = pars.get('max_T', 100)

        minsamplesize = pars.get('minsamplesize', 1) - 1

        p_sample_H = pars.get('p_sample_H', .5)
        p_sample_L = 1 - p_sample_H

        if self.stopdist == 'fixed-T':
            stop_T = pars.get('stop_T', 2)
            fixed_dist = randint(stop_T, stop_T+1)
        elif self.stopdist == 'geometric':
            p_stop = pars.get('p_stop', 0)
            fixed_dist = geom(p_stop, loc=(minsamplesize - 1))



        if 'obs' in pars:

            # assume a single sequence of known observations
            sampled_option = pars['obs']['sampled_option']
            outcomes = pars['obs']['outcomes']
            max_T = outcomes.shape[0]
            fixed_p_stop = fixed_dist.pmf(max_T - 1)

            opt_exp = []
            for i, opt in enumerate(options):
                opt_exp_i = []
                for j, x in enumerate(opt):
                    ind = np.where((sampled_option==i) & (outcomes==x[0]))[0]
                    n = float(len(np.where(sampled_option==i)[0]))
                    if n > 0:
                        opt_exp_i.append([x[0], len(ind)/n])
                    else:
                        opt_exp_i.append([x[0], 0])
                opt_exp_i = np.array(opt_exp_i)

                # assume single observation of zero
                if opt_exp_i[:,1].sum()==0:
                    zero = np.where(opt_exp_i[:,0]==0)[0][0]
                    opt_exp_i[zero,1] = 1
                opt_exp.append(opt_exp_i)
            opt_exp = np.array(opt_exp)

            # compute value and attentional weights for
            # each outcome
            weights = np.array([cpt.pweight_prelec(option, pars) for i, option in enumerate(opt_exp)])
            values = np.array([cpt.value_fnc(option[:,0], pars) for option in opt_exp])

            # choice function
            s = pars.get('s', 1.) # softmax temp
            vL, vH = [np.dot(w, v) for (w, v) in zip(weights, values)]
            cp = np.exp(vH * s) / (np.exp(vH * s) + np.exp(vL * s))

            p_stop_choose_A = fixed_p_stop * (1 - cp)
            p_stop_choose_B = fixed_p_stop * cp

            return {'p_stop_choose_A': p_stop_choose_A,
                    'p_stop_choose_B': p_stop_choose_B,
                    'cp_B': cp}

        else:


            values = np.array([cpt.value_fnc(option[:,0], pars) for option in options])


            # apply a fixed sample size
            samplesize = fixed_dist.rvs(size=N)
            max_T = samplesize.max()

            sampled_option = np.zeros((N, max_T), int)


            sampled_option = np.random.choice([0,1],
                                              p=[p_sample_L, p_sample_H],
                                              size=(N, max_T))

            # assume 2nd sample is from other option
            sampled_option[:,1] = np.abs(1 - sampled_option[:,0])
            sampled_A = sampled_option==0
            sampled_B = sampled_option==1


            # observation matrix
            observed = np.zeros((N, max_T))
            observed_A = np.random.choice(range(options[0].shape[0]),
                                          size=sampled_A.sum(),
                                          p=options[0][:,1])

            observed_B = np.random.choice(range(options[1].shape[0]),
                                          size=sampled_B.sum(),
                                          p=options[1][:,1])
            observed[sampled_A] = observed_A
            observed[sampled_B] = observed_B

            # outcomes experienced
            obj_outcomes = options[:,:,0]
            outcomes = np.zeros((N, max_T))
            outcomes[sampled_A] = obj_outcomes[0][observed_A]
            outcomes[sampled_B] = obj_outcomes[1][observed_B]


            # get relative frequencies
            wopt = deepcopy(options)
            wopt[:,:,0] = values

            choice = []
            for it in range(N):

                sampled_option_i = sampled_option[it,:(samplesize[it]+1)]
                outcomes_i = outcomes[it,:(samplesize[it]+1)]

                opt_exp = []
                for i, opt in enumerate(options):
                    opt_exp_i = []
                    for j, x in enumerate(opt):
                        ind = np.where((sampled_option_i==i) & (outcomes_i==x[0]))[0]
                        n = float(len(np.where(sampled_option_i==i)[0]))
                        opt_exp_i.append([x[0], len(ind)/n])
                    opt_exp.append(opt_exp_i)
                opt_exp = np.array(opt_exp)

                weights = np.array([cpt.pweight_prelec(option, pars) for i, option in enumerate(opt_exp)])
                wopt[:,:,1] = weights

                pH = cpt.choice_prob(wopt, pars)
                if np.random.random() < pH:
                    choice.append(1)
                else:
                    choice.append(0)


            choice = np.array(choice)
            p_resp = choice.mean()

            ss_A = samplesize[choice==0]
            ss_B = samplesize[choice==1]

            p_stop_A = np.bincount(ss_A, minlength=max_T)
            p_stop_A = p_stop_A/float(p_stop_A.sum())
            p_stop_B = np.bincount(ss_B, minlength=max_T)
            p_stop_B = p_stop_B/float(p_stop_B.sum())

            p_stop_cond = np.transpose([p_stop_A, p_stop_B])

            # only include data up to choice
            sampled_option = [sampled_option[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])]
            outcomes       = [outcomes[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])]
            outcome_ind    = [observed[i][:(samplesize[i]+1)] for i in range(samplesize.shape[0])]

            return {'choice': choice,
                    'samplesize': samplesize,
                    'p_resp': np.array([1-p_resp, p_resp]),
                    'p_stop_cond': p_stop_cond,
                    'sampled_option': sampled_option,
                    'outcomes': outcomes,
                    'outcome_ind': outcome_ind
                    }
Exemple #49
0
def create_random_data(filename, seconds, sample_rate, baseline=0.0, noise=None, event_rate=0, event_durations=None,
                       event_depths=None, overwrite=False):
    """
    Creates random sample data. Leaves the first 200 data points free of events.

    :param str filename: Filename for the data. If the file already exists and overwrite=False, an IOError is raised.
    :param float seconds: Number of seconds of data.
    :param float sample_rate: The sampling rate of the data, in Hz.
    :param float baseline: The baseline of the data, in uA.
    :param scipy.stats.distributions.rv_frozen noise: A frozen :mod:`scipy.stats` probability distribution\
            for the noise. An example normal distribution with mean 2 uA and std dev 3 uA is
            ::

                from scipy.stats import norm
                noise = norm(loc=2, scale=3)

            Default is no noise.
    :param float event_rate: Rate of events in Hz.
    :param scipy.stats.distributions.rv_frozen event_durations: A frozen :mod:`scipy.stats` probability distribution\
            for the event duration (in seconds).
    :param scipy.stats.distributions.rv_frozen event_depths: A frozen :mod:`scipy.stats` probability distribution\
            for the event depth, in uA.
    :param bool overwrite: Whether overwriting an existing file at filename is allowed. If false, and filename exists.\
            an IOError will be raised.
    :raises: :py:exc:`IOError` - If the filename already exists and overwrite=False.

    >>> from pypore.sampledata.creator import create_random_data
    >>> from scipy import stats
    >>> seconds = 1. # 1 second of data.
    >>> sample_rate = 1.e6 # 1 MHz sample rate.
    >>> event_rate = 100. # 100 events/sec, on average.
    >>> baseline = 10. # 10 uA baseline.
    >>> noise = stats.norm(scale=.5) # Normal distributed noise with mean of 0 std dev of 0.5 uA.
    >>> event_depths = stats.norm(loc=2., scale=1.) # Normal distributed events with mean of 2 and std dev of 1 uA.
    >>> event_durations = stats.norm(loc=100.e-6, scale=10.e-6) # Normal distributed event durations with mean of 100 us and std dev 10 us.
    >>> n_events = create_random_data('random_trace.h5', seconds, sample_rate, baseline, noise, event_rate, event_durations, event_depths)
    """
    if not overwrite and os.path.exists(filename):
        raise IOError(
            "File already exists. Use a different filename, or call with overwrite=True to over-write existing file.")

    n_points = int(seconds * sample_rate)
    f = open_file(filename, mode='w', n_points=n_points, sample_rate=sample_rate)

    data = np.zeros(n_points) + baseline

    if noise is not None:
        data += noise.rvs(size=n_points)

    event_count = 0
    if event_rate > 0:
        i = 200
        mean_length = event_durations.mean() * sample_rate
        expected_events = seconds * event_rate
        # Available space that is not events or the beginning of the data.
        free_space = n_points - expected_events * mean_length - i
        event_probability = expected_events/free_space
        # Use geometric distribution to find the next starting spot of an event.
        rv = geom(event_probability)
        while i < n_points:
            # get next event start distance
            i += rv.rvs()
            event_count += 1
            event_length = event_durations.rvs() * sample_rate
            event_depth_i = event_depths.rvs()
            if i + event_length > n_points:
                event_length = n_points - i
            data[i:i+event_length] += event_depth_i
            i += event_length

    f.root.data[:] = data[:]

    f.close()

    return event_count