コード例 #1
0
    def sample(self,m,components=None):
        """

        Samples m samples from the current finite mixture distribution.

        :param m: Number of samples to draw.
        :type m: int.
        :rtype: natter.DataModule.Data
        :returns:  A Data object containing the samples


        """
        dim = self['P'][0].sample(1).dim()
        nc = multinomial(m,self.param['alpha'])
        mrange = range(m)
        shuffle(mrange)
        X = zeros((dim,m))
        ind = 0
        K = len(self['P'])
        for k in xrange(K):
            dat = self.param['P'][k].sample(nc[k])
            X[:,mrange[ind:ind + nc[k]]] = dat.X
            if components is not None:
                components[mrange[ind:ind + nc[k]]] = k
            ind += nc[k]
        return Data(X,"%i samples from a %i-dimensional finite mixture distribution" % (m,dim))
コード例 #2
0
ファイル: stochastic.py プロジェクト: Thopiax/pydemic
    def _diff(self, dt: float):
        state_diff = {comp: 0 for comp in self.model.compartments}

        for (src, edges) in self.model.transition_rates.items():
            if len(edges) == 1:
                [(dest, rate)] = edges

                flow = binomial(self.model[src], rate * dt)

                # remove flow from src component
                state_diff[src] -= flow
                # add flow to dest component
                state_diff[dest] += flow
            else:
                assert src == "E"

                # add the last compartment as the "catchall" i.e. the probability that they stay exposed.
                infectious_rates = np.array([rate
                                             for (_, rate) in edges] + [0])

                flow = multinomial(self.model[src], infectious_rates * dt)

                state_diff[src] -= sum(flow[:-1])

                for index, (dest, _) in enumerate(edges):
                    state_diff[dest] += flow[index]

        return state_diff
コード例 #3
0
def generate(num_seq, seq_length, alphabet, m_word_length, m_word_param, background_param):
	magic_thetas = [dirichlet(m_word_param) for j in range(m_word_length)]
	background_theta = dirichlet(background_param)
	sequences = []
	starts = []
	for k in range(num_seq):
		background_onehots = [multinomial(1, background_theta) for x in range(seq_length - m_word_length)]
		background = [alphabet[t] for t in [i.tolist().index(1) for i in background_onehots]]
		#background = [alphabet[t].lower() for t in [i.tolist().index(1) for i in background_onehots]]
		magic_onehots = [multinomial(1, theta) for theta in magic_thetas]
		magic_word = [alphabet[j] for j in [i.tolist().index(1) for i in magic_onehots]]
		start_pos = randint(seq_length - m_word_length)
		background[start_pos : start_pos] = magic_word
		sequences.append(background)
		starts.append(start_pos)
	#print starts
	ans = []
	ans.append(starts)
	ans.append(sequences)
	return ans
コード例 #4
0
def multinomial(trials, probs, shape=[]):
    """multinomial(trials, probs) or multinomial(trials, probs, [n, m, ...]) returns
           array of multinomial distributed integer vectors.

           trials is the number of trials in each multinomial distribution.
           probs is a one dimensional array. There are len(prob)+1 events.
           prob[i] is the probability of the i-th event, 0<=i<len(prob).
           The probability of event len(prob) is 1.-np.sum(prob).

       The first form returns a single 1-D array containing one multinomially
           distributed vector.

           The second form returns an array of shape (m, n, ..., len(probs)).
           In this case, output[i,j,...,:] is a 1-D array containing a multinomially
           distributed integer 1-D array."""
    if shape == []:
        shape = None
    return mt.multinomial(trials, probs, shape)
コード例 #5
0
def multinomial(trials, probs, shape=[]):
    """multinomial(trials, probs) or multinomial(trials, probs, [n, m, ...]) returns
           array of multinomial distributed integer vectors.

           trials is the number of trials in each multinomial distribution.
           probs is a one dimensional array. There are len(prob)+1 events.
           prob[i] is the probability of the i-th event, 0<=i<len(prob).
           The probability of event len(prob) is 1.-np.sum(prob).

       The first form returns a single 1-D array containing one multinomially
           distributed vector.

           The second form returns an array of shape (m, n, ..., len(probs)).
           In this case, output[i,j,...,:] is a 1-D array containing a multinomially
           distributed integer 1-D array."""
    if shape == []:
        shape = None
    return mt.multinomial(trials, probs, shape)
コード例 #6
0
ファイル: core_methods.py プロジェクト: liuchao-nj/SparCC3
 def draw(x):
     p = x/float(x.sum())
     f = 1.*multinomial(n,p)
     return f
コード例 #7
0
def gibbssample(num_iters, pos_sequences, alphabet, m_word_length, m_word_param, background_param):
	#print sequences
	sequences = pos_sequences[1]
	M = len(alphabet) # Alphabet size
	K = len(sequences) # Num sequences
	N = len(sequences[0]) # Seq length
	alph_map = {alphabet[m] : m for m in range(M)}
	iter_logpost = [[] for x in range(2)]

	# Initialize hidden word starting locations
	R = randint(0, N - m_word_length, K).tolist()

	# Calculate sum of alphas for magic word and background distributions
	A = float(sum(m_word_param))
	A_back = float(sum(background_param))

	# Get magic word and background symbol counts
	N_m = [[0.0] * m_word_length for x in range(M)]
	bg_m = [0.0] * M
	for i in range(K):
		for x in range(N):
			if x >= R[i] and x < R[i] + m_word_length:
				N_m[alph_map[sequences[i][x].upper()]][x-R[i]] += 1
			else:
				bg_m[alph_map[sequences[i][x].upper()]] += 1

	# Begin iterations
	for l in range(num_iters):
		to_exclude = range(K)
		for s in range(K):
			# Select sequence to exclude
			exclude_indx = randint(len(to_exclude))
			z = to_exclude[exclude_indx]
			del to_exclude[exclude_indx]

			# Update counts for excluding excluded sequence
			for x in range(N):
				if x >= R[z] and x < R[z] + m_word_length:
					N_m[alph_map[sequences[z][x].upper()]][x-R[z]] -= 1
				else:
					bg_m[alph_map[sequences[z][x].upper()]] -= 1

			P = [[0.0] * m_word_length for x in range(M)]
			P_bg = [0.0] * M

			# Calculate log conditional P(s_(z,j) = m | s_(j,-z), alpha) for each symbol and m_word pos
			for m in range(M):
				P_bg[m] = log((bg_m[m] + background_param[m]) / (A_back + K*(N-m_word_length) - 1))
				for j in range(m_word_length):
					P[m][j] = log((N_m[m][j] + m_word_param[m]) / (A + K - 1))

			# Calculate posterior over each starting position in s_z
			r_bg_z = sum([P_bg[alph_map[m.upper()]] for m in sequences[z]])
			r = [r_bg_z] * (N - m_word_length)

			for s_pos in range(N - m_word_length):
				for j in range(m_word_length):
					idx = s_pos + j
					r[s_pos] -= P_bg[alph_map[sequences[z][idx].upper()]]
					r[s_pos] += P[alph_map[sequences[z][idx].upper()]][j]

			# Normalize conditionals
			probs = [exp(x) for x in r]
			normalizer = sum(probs)
			probs = [x/normalizer for x in probs]

			# Update starting position for s_z
			R[z] = multinomial(1,probs).tolist().index(1)

			# Update counts for updating starting position of excluded sequence
			for x in range(N):
				if x >= R[z] and x < R[z] + m_word_length:
					N_m[alph_map[sequences[z][x].upper()]][x-R[z]] += 1
				else:
					bg_m[alph_map[sequences[z][x].upper()]] += 1

		# Calculate posterior
		log_post = lgamma(A_back) - lgamma(K*(N - m_word_length) + A_back)
		for m in range(M):
			log_post += lgamma(bg_m[m] + background_param[m]) - lgamma(background_param[m])
		for j in range(m_word_length):
			log_post += lgamma(A) - lgamma(K + A)
			for m in range(M):
				log_post += lgamma(N_m[m][j] + m_word_param[m]) - lgamma(m_word_param[m])

		iter_logpost[0].append(l)
		iter_logpost[1].append(log_post)

	#print[abs(pos_sequences[0][i] - R[i]) < 1 for i in range(len(R))]
	#print R

	return [R,iter_logpost]
コード例 #8
0
          " ".join(["%.3g" % xx for xx in clusterPrior]))

    print(sys.stderr, "means:")
    for row in means:
        print(sys.stderr, " ".join(["%.3g" % xx for xx in row]))

    if vvar == 0:
        print(sys.stderr, "fixed variance %.3g" % sigma)
    else:
        print(sys.stderr, "variances:")
        for row in variances:
            print(sys.stderr, " ".join(["%.3g" % xx for xx in row]))
        print(sys.stderr, "noise variances:")
        print(sys.stderr, " ".join(["%.3g" % xx for xx in noiseVariances]))

    clusterSizes = multinomial(trainNum, clusterPrior)

    print (sys.stderr, "training cluster sizes:",\
          " ".join([str(xx) for xx in clusterSizes]))

    for label in labels(trainNum, clusterSizes):
        print(
            label, " ".join([
                str(xx) for xx in features(label, means, variances, noiseMeans,
                                           noiseVariances)
            ]))

    clusterSizes = multinomial(num, clusterPrior)
    print(clusterSizes)
    print(sys.stderr, "cluster sizes:",\
          " ".join([str(xx) for xx in clusterSizes]))
コード例 #9
0
          " ".join(["%.3g" % xx for xx in clusterPrior]))

    print (sys.stderr, "means:")
    for row in means:
        print (sys.stderr, " ".join(["%.3g" % xx for xx in row]))

    if vvar == 0:
        print (sys.stderr, "fixed variance %.3g" % sigma)
    else:
        print (sys.stderr, "variances:")
        for row in variances:
            print (sys.stderr, " ".join(["%.3g" % xx for xx in row]))
        print (sys.stderr, "noise variances:")
        print (sys.stderr, " ".join(["%.3g" % xx for xx in noiseVariances]))

    clusterSizes = multinomial(trainNum, clusterPrior)

    print (sys.stderr, "training cluster sizes:",\
          " ".join([str(xx) for xx in clusterSizes]))

    for label in labels(trainNum, clusterSizes):
        print (label, " ".join([str(xx) for xx in
                               features(label, means, variances,
                                        noiseMeans, noiseVariances)]))

    clusterSizes = multinomial(num, clusterPrior)
    print(clusterSizes)
    print(sys.stderr, "cluster sizes:",\
          " ".join([str(xx) for xx in clusterSizes]))

    for label in labels(num, clusterSizes):
コード例 #10
0
    #
    allProbs = np.exp(allProbs)
    return allProbs / np.sum(allProbs)


# *** Gibbs sampling 200x
# *** Gibbs sampling 200x
for num_iter in range(200):
    print(num_iter)
    # update c
    logPi = np.log(pi)

    probs = x.map(lambda x_i: getProbs(False, log_mu, x_i, logPi))
    # Now we need to asign and find out to which category goes each document

    c = probs.map(lambda prob: np.nonzero(multinomial(1, prob))[0][0]
                  )  # *** c is the assignment of each doc to a category

    # update pi
    count = dict(c.map(lambda cat: (cat, 1)).reduceByKey(add).takeOrdered(
        20))  # *** this is 'a' (vector of size 20) in the PDF

    # Now, we update the alpha
    new_alpha = [0] * 20
    for i in range(20):
        if i in count:
            new_alpha[i] = alpha[i] + count[
                i]  # *** count[i],  where i is the key
        else:
            new_alpha[i] = alpha[i]
コード例 #11
0
def assignCategory(x, i, c):
    if x[1] == i:
        return c
    else:
        return x[0]


for num_iter in range(200):
    print(num_iter)
    # update c
    logPi = np.log(pi)
    probs = x.map(lambda x_i: getProbs(False, log_mu, x_i, logPi)).collect()
    # print(len(probs)) # 19997
    # Now we need to asign and find out to which category goes each document

    c_local = [np.nonzero(multinomial(1, prob))[0][0] for prob in probs]
    # print(len(c_local)) # 19997

    c = x.zipWithIndex().map(lambda tup: c_local[tup[1]])
    #make it eager

    # update pi
    count = c.map(lambda cat: (cat, 1)).reduceByKey(add).sortByKey(
        ascending=True).collectAsMap()

    # Now, we update the alpha
    new_alpha = [0] * 20
    for i in range(20):
        if i in count:
            new_alpha[i] = alpha[i] + count[i]
        else:
コード例 #12
0
 def __call__( self, nothing=None ) :
     nsample = randint( 1, self.Msamples )
     fnsample = float(nsample)
     sample = multinomial( nsample, self.pvector )/fnsample
     #print sample
     return sample