def chain(i,n,z,N,lam,p,q):
    t=[]
    l1=[]
    l2=[]
    l3=[]
    for k in range(0,N):
        y = np.random.uniform(0,1)
        x = hypergeom.rvs(n,i,z)
        pi= lam*(hypergeom_pmf(n, i, z, x))*(x/z)*((n-i)/n) + (1-lam)*p*((n-i)/n)
        ip= lam*(hypergeom_pmf(n, n-i, z,z-x))*((z-x)/z)*(i/n) + (1-lam)*q*(i/n)
        if i != 0 and i != n:
            if  y <= pi:
                i=i+1
            elif pi <y<= ip+pi:
                i=i-1
            else:
                i=i
        else:
            i=i
        l1.append(pi)  
        l2.append(ip)
        l3.append(y)
        t.append(i) 
    #print(l1)
    #print(l2)
    #print(l3)
    return t
Beispiel #2
0
def _exactly_sample(rdd, num: int, seed: int):
    split_size = rdd.mapPartitionsWithIndex(
        lambda s, it: [(s, sum(1 for _ in it))]).collectAsMap()
    total = sum(split_size.values())

    if num > total:
        raise ValueError(
            f"not enough data to sample, own {total} but required {num}")
    # random the size of each split
    sampled_size = {}
    for split, size in split_size.items():
        sampled_size[split] = hypergeom.rvs(M=total, n=size, N=num)
        total = total - size

    return rdd.mapPartitionsWithIndex(_ReservoirSample(
        split_sample_size=sampled_size, seed=seed).func,
                                      preservesPartitioning=True)
Beispiel #3
0
	def simulate_sketch(self,kmerSequenceLength,nMutated,sketchSize):
		if not (0 < sketchSize <= kmerSequenceLength): raise ValueError
		prng = self.sketchPrngs[sketchSize] if (sketchSize in self.sketchPrngs) else None
		# Given sequence length L and N mutated kmers, we consider the kmers
		# in A union B to be numbered from 0 to L+N-1, and we consider the
		# *un*mutated kmers to be the first L-N of these
		#    <---unmutated--> <---mutated, in A--> <---mutated, in B-->
		#   +----------------+--------------------+--------------------+
		#   | 0        L-1-N | L-N            L-1 | L            L+N-1 |
		#   +----------------+--------------------+--------------------+
		# The L-N *un*mutated kmers are A intersection B. The hash function
		# would effectively choose a random set of s of all L+N kmers as bottom
		# sketch BS(A union B), where s is the sketch size. So conceptually, we
		# have an urn with L+N balls, s of which are 'red'. We draw L-N balls
		# and want to know how many are red. This is the size of the
		# intersection of BS(A), BS(B), and BS(A union B).
		L = kmerSequenceLength
		N = nMutated
		s = sketchSize
		if (N == L):    # hypergeom.rvs doesn't handle this case, a
			return 0    # .. case that seems perfectly legitimate
		nIntersection = hypergeom.rvs(L+N,s,L-N,random_state=prng)
		return nIntersection
from scipy.stats import hypergeom
import matplotlib.pyplot as plt

# Suppose we have a collection of 20 animals, of which 7 are dogs.  Then if
# we want to know the probability of finding a given number of dogs if we
# choose at random 12 of the 20 animals, we can initialize a frozen
# distribution and plot the probability mass function:

[M, n, N] = [20, 7, 12]
rv = hypergeom(M, n, N)
x = np.arange(0, n + 1)
pmf_dogs = rv.pmf(x)

fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x, pmf_dogs, 'bo')
ax.vlines(x, 0, pmf_dogs, lw=2)
ax.set_xlabel('# of dogs in our group of chosen animals')
ax.set_ylabel('hypergeom PMF')
plt.show()

# Instead of using a frozen distribution we can also use `hypergeom`
# methods directly.  To for example obtain the cumulative distribution
# function, use:

prb = hypergeom.cdf(x, M, n, N)

# And to generate random numbers:

R = hypergeom.rvs(M, n, N, size=10)
f2 = c2[3]

print(
    "Probabilidad de que haya exactamente 3 cargamentos que contengan \nal menos un dispositivo defectuoso de entre los 20 seleccionados en 10000000 simulaciones es:",
    f2)

#n = 5, N = 40, k = 3, x = 1
print("-----------------------------------------------------------------")
print("Ejercicio 2)\n")

Mvar = 40
nvar = 5
Nvar = 3
print("Con, M = 40, n = 5 y N = 3:")
variable = hypergeom.rvs(Mvar, nvar, Nvar, size=size)
a3, b3 = np.unique(variable, return_counts=True)

c3 = b3 / size
f3 = c3[1]

print(
    "Probabilidad de que se encuentre exactamente un componente defectuoso\ncon 10000000 de simulaciones:",
    f3)

print("-----------------------------------------------------------------")
print("Ejercicio 3)\n")

print("Con lambda = 1:")
z = poisson.rvs(1, size=size)
Beispiel #6
0
import matplotlib.pyplot as plt
from scipy.stats import hypergeom, rv_discrete
import numpy as np
numargs = hypergeom.numargs
#[ M, n, N ] = [100, 10, -1]

#Display frozen pmf:

rv = hypergeom( 10, 20, 3 )
print rv.dist.b
x = np.arange( 0, np.min( rv.dist.b, 3 ) + 1 )
h = plt.plot( x, rv.pmf( x ) )
exit()
#Check accuracy of cdf and ppf:

prb = hypergeom.cdf( x, M, n, N )
h = plt.semilogy( np.abs( x - hypergeom.ppf( prb, M, n, N ) ) + 1e-20 )

#Random number generation:

R = hypergeom.rvs( M, n, N, size=100 )

#Custom made discrete distribution:

vals = [np.arange( 7 ), ( 0.1, 0.2, 0.3, 0.1, 0.1, 0.1, 0.1 )]
custm = rv_discrete( name='custm', values=vals )
h = plt.plot( vals[0], custm.pmf( vals[0] ) )

Beispiel #7
0
ax.axvline(x=q1, linewidth=3, alpha=0.6, color='black', linestyle='dashed')
ax.axvline(x=median, linewidth=3, alpha=0.6, color='black', linestyle='dashed')
ax.axvline(x=q3, linewidth=3, alpha=0.6, color='black', linestyle='dashed')

horiz_text_offset = 0.4
vert_text_offset = 0.1

plt.xlim(0, 21)
plt.text(x[0] + (q1 - x[0]) / 2.0 - horiz_text_offset, vert_text_offset, 'Q1', color='black', size='x-large')
plt.text(q1 + (median - q1) / 2.0 - horiz_text_offset, vert_text_offset, 'Q2', color='black', size='x-large')
plt.text(median + (q3 - median) / 2.0 - horiz_text_offset, vert_text_offset, 'Q3', color='black', size='x-large')
plt.text(q3 + (x[-1] - q3) / 2.0 - horiz_text_offset, vert_text_offset, 'Q4', color='black', size='x-large')

# Random samples
samp_size = 100
pts = hypergeom.rvs(M, n, N, size=samp_size)

# Add histogram for sampled points
ys = [.005] * samp_size
plt.hist(pts, bins=10, facecolor='purple', alpha=0.45, weights=np.ones_like(pts) / float(len(pts)), density=False,
         edgecolor='black', linewidth=0.5)
plt.plot(pts, ys, 'bx')

plt.show()

# Sample statistics
std_sample = np.std(pts)
var_sample = np.var(pts)
mean_sample = np.mean(pts)
q1_sample, median_sample, q3_sample = np.percentile(pts, [25, 50, 75])
for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(5)
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(5)
fig.suptitle('Distribucion de Poisson')
plt.show()

# DISTRIBUCIÓN HIPERGEOMETRICA

from scipy.stats import hypergeom

hypergeom.pmf(1, M=15 + 10, n=15, N=3)
hypergeom.cdf(1, M=15 + 10, n=15, N=3)
1 - hypergeom.cdf(1, M=15 + 10, n=15, N=3)

hypergeom.rvs(M=15 + 10, n=15, N=3, size=100)

[M, n, N] = [20, 7, 12]
x = np.arange(max(0, N - M + n), min(n, N))
fig = plt.figure(figsize=(5, 2.7))
ax = fig.add_subplot(1, 2, 1)
ax.plot(x, hypergeom.pmf(x, M, n, N), 'bo', ms=5, label='hypergeom pmf')
ax.vlines(x, 0, hypergeom.pmf(x, M, n, N), colors='b', lw=2, alpha=0.5)
ax.set_ylim([0, max(hypergeom.pmf(x, M, n, N)) * 1.1])
for tick in ax.xaxis.get_major_ticks():
    tick.label.set_fontsize(5)
for tick in ax.yaxis.get_major_ticks():
    tick.label.set_fontsize(5)
ax = fig.add_subplot(1, 2, 2)
ax.plot(x, hypergeom.cdf(x, M, n, N), 'bo', ms=5, label='hypergeom cdf')
ax.vlines(x, 0, hypergeom.cdf(x, M, n, N), colors='b', lw=2, alpha=0.5)