def testem(self): # complex DataSet with HMM sequences and scalar data dat = self.gen.sampleSet(100) # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM(dat, [seq1]) data.internalInit(self.m) tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]] tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2], [0.4, 0.3, 0.15, 0.15]] tpi = [0.3, 0.3, 0.4] th1 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi) tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4], [0.2, 0.1, 0.6, 0.1]] tpi2 = [0.3, 0.4, 0.3] th2 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2) tn1 = NormalDistribution(-1.5, 1.5) tn2 = NormalDistribution(9.0, 1.2) tmult1 = MultinomialDistribution(3, 4, [0.1, 0.1, 0.55, 0.25], alphabet=self.DIAG) tmult2 = MultinomialDistribution(3, 4, [0.4, 0.3, 0.1, 0.2], alphabet=self.DIAG) tc1 = ProductDistribution([tn1, tmult1, th1]) tc2 = ProductDistribution([tn2, tmult2, th2]) tmpi = [0.7, 0.3] tm = MixtureModel(2, tmpi, [tc1, tc2]) tm.EM(data, 80, 0.1, silent=1)
def setUp(self): # building generating models self.DIAG = Alphabet(['.', '0', '8', '1']) A = [[0.3, 0.6, 0.1], [0.0, 0.5, 0.5], [0.4, 0.2, 0.4]] B = [[0.5, 0.2, 0.1, 0.2], [0.5, 0.4, 0.05, 0.05], [0.8, 0.1, 0.05, 0.05]] pi = [1.0, 0.0, 0.0] self.h1 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), A, B, pi) A2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] B2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.5], [0.2, 0.2, 0.3, 0.3]] pi2 = [0.6, 0.4, 0.0] self.h2 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), A2, B2, pi2) n1 = NormalDistribution(2.5, 0.5) n2 = NormalDistribution(6.0, 0.8) mult1 = MultinomialDistribution(3, 4, [0.23, 0.26, 0.26, 0.25], alphabet=self.DIAG) mult2 = MultinomialDistribution(3, 4, [0.7, 0.1, 0.1, 0.1], alphabet=self.DIAG) c1 = ProductDistribution([n1, mult1, self.h1]) c2 = ProductDistribution([n2, mult2, self.h2]) mpi = [0.4, 0.6] self.m = MixtureModel(2, mpi, [c1, c2]) # mixture for sampling gc1 = ProductDistribution([n1, mult1]) gc2 = ProductDistribution([n2, mult2]) self.gen = MixtureModel(2, mpi, [gc1, gc2])
def parseFile(fileHandle): """ Internal function. Parses flat files. """ s = chomp(fileHandle.readline()) l = s.split(';') if l[1] == "Mix": [offset, head, G, pi, compFix] = l return parseMix(fileHandle, head, int(G), simple_eval(pi), simple_eval(compFix)) elif l[1] == "Norm": from pymix.distributions.normal import NormalDistribution [offset, head, mu, sigma] = l return NormalDistribution(float(mu), float(sigma)) elif l[1] == "Exp": from pymix.distributions.exponential import ExponentialDistribution [offset, head, lambd] = l return ExponentialDistribution(float(lambd)) elif l[1] == "Mult": from pymix.distributions.multinomial import MultinomialDistribution [offset, head, N, M, phi, alphabet, parFix] = l alph = Alphabet(simple_eval(alphabet)) return MultinomialDistribution(int(N), int(M), simple_eval(phi), alph, simple_eval(parFix)) elif l[1] == "Discrete": from pymix.distributions.discrete import DiscreteDistribution [offset, head, M, phi, alphabet, parFix] = l alph = Alphabet(simple_eval(alphabet)) return DiscreteDistribution(int(M), simple_eval(phi), alph, simple_eval(parFix)) elif l[1] == "MultiNormal": from pymix.distributions.multinormal import MultiNormalDistribution [offset, head, p, mu, sigma] = l # XXX the tokenize package used in simple_eval cannot deal with negative values in # mu or sigma. A hack solution to that would be to change simple_eval to a direct # call to eval in the line below. This carries all the usual implications for security. return MultiNormalDistribution(int(p), simple_eval(mu), simple_eval(sigma)) elif l[1] == "Dirichlet": from pymix.distributions.dirichlet import DirichletDistribution [offset, head, M, alpha] = l return DirichletDistribution(int(M), simple_eval(alpha)) elif l[1] == "DirichletPr": from pymix.priors.dirichlet import DirichletPrior [offset, head, M, alpha] = l return DirichletPrior(int(M), simple_eval(alpha)) elif l[1] == "NormalGamma": from examples.crp import NormalGammaPrior [offset, head, mu, kappa, dof, scale] = l return NormalGammaPrior(float(mu), float(kappa), float(dof), float(scale)) # elif l[1] == "PriorForDirichlet": # [offset, head, M, eta] = l # return PriorForDirichletDistribution(int(M), simple_eval(eta)) elif l[1] == "Prod": [offset, head, p] = l return parseProd(fileHandle, int(p)) elif l[1] == "MixPrior": #;MixPrior;4;0.7;0.7 [offset, head, nr_dist, structPrior, nrCompPrior] = l return parseMixPrior(fileHandle, int(nr_dist), float(structPrior), float(nrCompPrior)) elif l[1] == "DirichMixPrior": #;DirichMixPrior;3;5;[ 0.3 0.3 0.4] [offset, head, G, M, pi] = l return parseDirichletMixPrior(fileHandle, int(G), int(M), simple_eval(pi)) else: raise TypeError, "Unknown keyword: " + str(l[1])
for i in range(N): p.append(random()) p2.append(random()) p5.append(random()) g = lambda x: x/sum(p) p = map(g,p) g2 = lambda x: x/sum(p2) p2 = map(g2,p2) g5 = lambda x: x/sum(p5) p5 = map(g5,p5) multi = MultinomialDistribution(80,N,p) multi2 = MultinomialDistribution(80,N,p2) multi5 = MultinomialDistribution(80,N,p5) mix = MixtureModel(3,[0.5,0.25,0.25],[multi,multi2,multi5]) print mix [true, s] = mix.labelled_sample(1000) p3 = [] p4 = [] p6 = [] for i in range(N): p3.append(random()) p4.append(random()) p6.append(random())
p2.append(random.random()) p3.append(random.random()) p4.append(random.random()) g1 = lambda x: x / sum(p1) p1 = map(g1, p1) g2 = lambda x: x / sum(p2) p2 = map(g2, p2) g3 = lambda x: x / sum(p3) p3 = map(g3, p3) g4 = lambda x: x / sum(p4) p4 = map(g4, p4) mult = MultinomialDistribution(6, 25, p1, SNP) mult2 = MultinomialDistribution(7, 25, p2, SNP) phi = NormalDistribution(11.0, 4.0) phi2 = NormalDistribution(11.0, 6.0) pd1 = ProductDistribution([mult, mult2, phi, phi2]) mult3 = MultinomialDistribution(6, 25, p3, SNP) mult4 = MultinomialDistribution(7, 25, p4, SNP) phi3 = NormalDistribution(8.0, 5.0) phi4 = NormalDistribution(15.0, 5.0) pd2 = ProductDistribution([mult, mult2, phi, phi2]) m = MixtureModel(2, [0.5, 0.5], [pd1, pd2]) m.EM(d, 15, 0.05)
from random import random from pymix.util.times.timer import Timer from pymix.distributions.multinomial import MultinomialDistribution from pymix.distributions.product import ProductDistribution from pymix.models.mixture import MixtureModel pdList = [] for j in range(3): dList = [] for i in range(10): par = [random(), random(), random(), random(), random(), random()] f = lambda x: x / sum(par) par = map(f, par) dList.append(MultinomialDistribution(6, 6, par)) pdList.append(ProductDistribution(dList)) piList = [random(), random(), random()] g = lambda x: x / sum(piList) piList = map(g, piList) mix = MixtureModel(3, piList, pdList) dat = mix.sampleDataSet(1000) pdList2 = [] for j in range(3): dList2 = [] for i in range(10): par2 = [random(), random(), random(), random(), random(), random()]
#seq = h1.hmm.sample(10,50) #print seq A2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] B2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.5], [0.2, 0.2, 0.3, 0.3]] pi2 = [0.6, 0.4, 0.0] h2 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), A2, B2, pi2) n1 = NormalDistribution(2.5, 0.5) n2 = NormalDistribution(6.0, 0.8) mult1 = MultinomialDistribution(3, 4, [0.23, 0.26, 0.26, 0.25], alphabet=DIAG) mult2 = MultinomialDistribution(3, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG) c1 = ProductDistribution([n1, mult1, h1]) c2 = ProductDistribution([n2, mult2, h2]) mpi = [0.4, 0.6] m = MixtureModel(2, mpi, [c1, c2]) #print m #print "-->",m.components[0].suff_dataRange # ----------- constructing complex DataSet ---------------- # mixture for sampling gc1 = ProductDistribution([n1, mult1])
m6 = MixtureModel(1, [1.0], [NormalDistribution(-1.5, 2.5)]) #m6.EM(seq4,1,5) #print m6 #seq5 = numarray.zeros(900,numarray.Float) #for i in range(900): # seq5[i] = random.normalvariate(0.0,0.5) #print "var = ", variance(seq5) # ----------------------------- Example 5 ----------------------------- mc1 = MixtureModel(1, [1.0], [MultinomialDistribution(6, 3, [0.0, 0.25, 0.75])]) mc2 = MixtureModel(1, [1.0], [MultinomialDistribution(6, 3, [0.5, 0.3, 0.2])]) m7 = MixtureModel(2, [0.5, 0.5], [mc1, mc2]) seq6 = m7.sampleSet(150) mc3 = MixtureModel(1, [1.0], [MultinomialDistribution(6, 3, [0.4, 0.5, 0.1])]) mc4 = MixtureModel(1, [1.0], [MultinomialDistribution(6, 3, [0.2, 0.1, 0.7])]) m8 = MixtureModel(2, [0.1, 0.9], [mc3, mc4]) m8.EM(seq6, 30, 0.3) #print m8 # ----------------------------- Example 6 -----------------------------
], [ 3, 3, 5, 6, 4, 5, 4, 4, 4, 1, 3, 4, 5, 4, 1, 4, 4, 3, 1, 6, 6, 4, 4, 4, 4 ], [ 6, 4, 7, 4, 6, 4, 3, 4, 5, 1, 4, 6, 5, 4, 1, 5, 4, 4, 1, 5, 5, 4, 4, 4, 5 ]]) #compStructure(G,T) #raise RuntimeError DIAG = Alphabet(['.', '0', '8', '1']) noise1 = MultinomialDistribution(1, 4, [0.5, 0.15, 0.15, 0.2], alphabet=DIAG) noise2 = NormalDistribution(0, 1.0) noise3 = MultinomialDistribution(1, 4, [0.1, 0.4, 0.4, 0.1], alphabet=DIAG) noise4 = MultinomialDistribution(1, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG) noise5 = NormalDistribution(2.5, 0.5) noise6 = NormalDistribution(9.0, 1.0) noise7 = NormalDistribution(19.0, 1.0) noise8 = NormalDistribution(2.0, 1.0) nlist = [noise1, noise2, noise3, noise4, noise5, noise6, noise7] n1 = NormalDistribution(2.5, 0.5) n2 = NormalDistribution(3.2, 0.5) n3 = NormalDistribution(2.5, 0.5) n4 = NormalDistribution(-5.5, 0.5) n5 = NormalDistribution(-3.0, 0.5)