def testem(self): # complex DataSet with HMM sequences and scalar data dat = self.gen.sampleSet(100) # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM(dat, [seq1]) data.internalInit(self.m) tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]] tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2], [0.4, 0.3, 0.15, 0.15]] tpi = [0.3, 0.3, 0.4] th1 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi) tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4], [0.2, 0.1, 0.6, 0.1]] tpi2 = [0.3, 0.4, 0.3] th2 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2) tn1 = NormalDistribution(-1.5, 1.5) tn2 = NormalDistribution(9.0, 1.2) tmult1 = MultinomialDistribution(3, 4, [0.1, 0.1, 0.55, 0.25], alphabet=self.DIAG) tmult2 = MultinomialDistribution(3, 4, [0.4, 0.3, 0.1, 0.2], alphabet=self.DIAG) tc1 = ProductDistribution([tn1, tmult1, th1]) tc2 = ProductDistribution([tn2, tmult2, th2]) tmpi = [0.7, 0.3] tm = MixtureModel(2, tmpi, [tc1, tc2]) tm.EM(data, 80, 0.1, silent=1)
def setUp(self): # building generating models self.DIAG = Alphabet(['.', '0', '8', '1']) A = [[0.3, 0.6, 0.1], [0.0, 0.5, 0.5], [0.4, 0.2, 0.4]] B = [[0.5, 0.2, 0.1, 0.2], [0.5, 0.4, 0.05, 0.05], [0.8, 0.1, 0.05, 0.05]] pi = [1.0, 0.0, 0.0] self.h1 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), A, B, pi) A2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] B2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.5], [0.2, 0.2, 0.3, 0.3]] pi2 = [0.6, 0.4, 0.0] self.h2 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), A2, B2, pi2) n1 = NormalDistribution(2.5, 0.5) n2 = NormalDistribution(6.0, 0.8) mult1 = MultinomialDistribution(3, 4, [0.23, 0.26, 0.26, 0.25], alphabet=self.DIAG) mult2 = MultinomialDistribution(3, 4, [0.7, 0.1, 0.1, 0.1], alphabet=self.DIAG) c1 = ProductDistribution([n1, mult1, self.h1]) c2 = ProductDistribution([n2, mult2, self.h2]) mpi = [0.4, 0.6] self.m = MixtureModel(2, mpi, [c1, c2]) # mixture for sampling gc1 = ProductDistribution([n1, mult1]) gc2 = ProductDistribution([n2, mult2]) self.gen = MixtureModel(2, mpi, [gc1, gc2])
def parseMix(fileHandle, mtype, G, pi, compFix, leaders=None, groups=None): """ Parses a flat file for a mixture model. Internal function, is invoked from readMixture. """ components = [] while len(components) < G: components.append(parseFile(fileHandle)) if mtype == 'Mix': from pymix.models.mixture import MixtureModel m = MixtureModel(G, pi, components, compFix=compFix) elif mtype == 'labelBayesMix': from pymix.models.labeled_bayes import labeledBayesMixtureModel prior = parseFile(fileHandle) if sum(compFix) > 0: # XXX pass compFix if it is not trivial m = labeledBayesMixtureModel(G, pi, components, prior, compFix=compFix) else: m = labeledBayesMixtureModel(G, pi, components, prior) elif mtype == 'BayesMix': from pymix.models.bayes import BayesMixtureModel prior = parseFile(fileHandle) if sum(compFix) > 0: # XXX pass compFix if it is not trivial m = BayesMixtureModel(G, pi, components, prior, compFix=compFix) else: m = BayesMixtureModel(G, pi, components, prior) else: raise TypeError if leaders and groups: m.initStructure() m.leaders = leaders m.groups = groups for i in range(m.dist_nr): for lead in m.leaders[i]: for g in m.groups[i][lead]: if not m.components[lead][i] == m.components[g][i]: raise IOError, 'Incompatible CSI structure and parameter values in parseMix.' m.components[g][i] = m.components[lead][i] return m
def setUp(self): # building generating models self.DIAG = Alphabet(['.', '0', '8', '1']) A = [[0.3, 0.6, 0.1], [0.0, 0.5, 0.5], [0.4, 0.2, 0.4]] B = [[0.5, 0.2, 0.1, 0.2], [0.5, 0.4, 0.05, 0.05], [0.8, 0.1, 0.05, 0.05]] pi = [1.0, 0.0, 0.0] self.h1 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), A, B, pi) A2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] B2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.5], [0.2, 0.2, 0.3, 0.3]] pi2 = [0.6, 0.4, 0.0] self.h2 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), A2, B2, pi2) n1 = NormalDistribution(2.5, 0.5) n2 = NormalDistribution(6.0, 0.8) mult1 = MultinomialDistribution(3, 4, [0.23, 0.26, 0.26, 0.25], alphabet=self.DIAG) mult2 = MultinomialDistribution(3, 4, [0.7, 0.1, 0.1, 0.1], alphabet=self.DIAG) c1 = ProductDistribution([n1, mult1, self.h1]) c2 = ProductDistribution([n2, mult2, self.h2]) mpi = [0.4, 0.6] self.m = MixtureModel(2, mpi, [c1, c2]) # mixture for sampling gc1 = ProductDistribution([n1, mult1]) gc2 = ProductDistribution([n2, mult2]) self.gen = MixtureModel(2, mpi, [gc1, gc2])
def testememptylist(self): # complex DataSet with HMM sequences only # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM([], [seq1]) tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]] tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2], [0.4, 0.3, 0.15, 0.15]] tpi = [0.3, 0.3, 0.4] th1 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi) tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4], [0.2, 0.1, 0.6, 0.1]] tpi2 = [0.3, 0.4, 0.3] th2 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2) c1 = ProductDistribution([th1]) c2 = ProductDistribution([th2]) mpi = [0.4, 0.6] hm = MixtureModel(2, mpi, [c1, c2]) data.internalInit(hm) hm.EM(data, 40, 0.1, silent=1)
def testinternalinitcomplexempty(self): # complex DataSet with HMM sequences only # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM([], [seq1]) self.assertRaises(AssertionError, data.internalInit, self.m) c1 = ProductDistribution([self.h1]) c2 = ProductDistribution([self.h2]) mpi = [0.4, 0.6] hm = MixtureModel(2, mpi, [c1, c2]) data.internalInit(hm) self.assertEqual(str(data.complexFeature), '[1]') self.assertEqual(data.p, 1) self.assertEqual(data.suff_p, 1)
def getRandomMixture(G, p, KL_lower, KL_upper, dtypes='discgauss', M=4, seed=None): # if seed: # random.seed(seed) # mixextend.set_gsl_rng_seed(seed) # #print '*** seed=',seed # # else: # XXX debug # seed = random.randint(1,9000000) # mixextend.set_gsl_rng_seed(seed) # random.seed(seed) # #print '*** seed=',seed #M = 4 # Alphabet size for discrete distributions min_sigma = 0.1 # minimal std for Normal max_sigma = 1.0 # maximal std for Normal min_mu = -5.0 # minimal mean max_mu = 8.0 # maximal mean if dtypes == 'disc': featureTypes = [0] * p elif dtypes == 'gauss': featureTypes = [1] * p elif dtypes == 'discgauss': # discrete or Normal features for now, chosen uniformly # 0 discrete, 1 Normal featureTypes = [random.choice((0, 1)) for i in range(p)] else: raise TypeError #print featureTypes C = [] for j in range(p): c_j = [] for i in range(G): #print i,j if featureTypes[j] == 0: acc = 0 while acc == 0: cand = DiscreteDistribution(M, random_vector(M)) #print 'cand:',cand acc = 1 for d in c_j: KL_dist = sym_kl_dist(d, cand) if KL_dist > KL_upper or KL_dist < KL_lower: #print ' *', cand, 'rejected:', d , KL_dist acc = 0 break c_j.append(cand) elif featureTypes[j] == 1: acc = 0 while acc == 0: mu = random.uniform(min_mu, max_mu) sigma = random.uniform(min_sigma, max_sigma) cand = NormalDistribution(mu, sigma) #print 'cand:',cand acc = 1 for d in c_j: KL_dist = sym_kl_dist(d, cand) if KL_dist > KL_upper or KL_dist < KL_lower: #print ' *', cand, 'rejected:', d , KL_dist acc = 0 c_j.append(cand) else: RuntimeError C.append(c_j) # print '\n' # for cc in C: # print cc comps = [] for i in range(G): comps.append(ProductDistribution([C[j][i] for j in range(p)])) pi = get_random_pi(G, 0.1) m = MixtureModel(G, pi, comps, struct=1) m.updateFreeParams() return m
p2.append(random.random()) p3.append(random.random()) p4.append(random.random()) g1 = lambda x: x / sum(p1) p1 = map(g1, p1) g2 = lambda x: x / sum(p2) p2 = map(g2, p2) g3 = lambda x: x / sum(p3) p3 = map(g3, p3) g4 = lambda x: x / sum(p4) p4 = map(g4, p4) mult = MultinomialDistribution(6, 25, p1, SNP) mult2 = MultinomialDistribution(7, 25, p2, SNP) phi = NormalDistribution(11.0, 4.0) phi2 = NormalDistribution(11.0, 6.0) pd1 = ProductDistribution([mult, mult2, phi, phi2]) mult3 = MultinomialDistribution(6, 25, p3, SNP) mult4 = MultinomialDistribution(7, 25, p4, SNP) phi3 = NormalDistribution(8.0, 5.0) phi4 = NormalDistribution(15.0, 5.0) pd2 = ProductDistribution([mult, mult2, phi, phi2]) m = MixtureModel(2, [0.5, 0.5], [pd1, pd2]) m.EM(d, 15, 0.05)
for j in range(3): dList = [] for i in range(10): par = [random(), random(), random(), random(), random(), random()] f = lambda x: x / sum(par) par = map(f, par) dList.append(MultinomialDistribution(6, 6, par)) pdList.append(ProductDistribution(dList)) piList = [random(), random(), random()] g = lambda x: x / sum(piList) piList = map(g, piList) mix = MixtureModel(3, piList, pdList) dat = mix.sampleDataSet(1000) pdList2 = [] for j in range(3): dList2 = [] for i in range(10): par2 = [random(), random(), random(), random(), random(), random()] f = lambda x: x / sum(par2) par2 = map(f, par2) dList2.append(MultinomialDistribution(6, 6, par2)) pdList2.append(ProductDistribution(dList2))
c1 = ProductDistribution([n1, mult1, h1]) c2 = ProductDistribution([n2, mult2, h2]) mpi = [0.4, 0.6] m = MixtureModel(2, mpi, [c1, c2]) #print m #print "-->",m.components[0].suff_dataRange # ----------- constructing complex DataSet ---------------- # mixture for sampling gc1 = ProductDistribution([n1, mult1]) gc2 = ProductDistribution([n2, mult2]) gen = MixtureModel(2, mpi, [gc1, gc2]) dat = gen.sampleSet(100) #print dat # sampling hmm data seq1 = h1.hmm.sample(40, 10) seq2 = h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() #data.fromGHMM(dat,[seq1]) data.fromGHMM(dat, [seq1])
from pymix.distributions.normal import NormalDistribution from pymix.distributions.product import ProductDistribution from pymix.models.mixture import MixtureModel from pymix.util.dataset import DataSet pr1 = ProductDistribution([NormalDistribution(-6.0, 0.5), NormalDistribution(-4.0, 0.5), NormalDistribution(-3.0, 0.5)]) pr2 = ProductDistribution([NormalDistribution(-5.0, 0.5), NormalDistribution(-3.3, 0.5), NormalDistribution(-2.3, 0.5)]) m = MixtureModel(2, [0.7, 0.3], [pr1, pr2]) seq = m.sampleSet(5) #print seq z = 0 m.printTraceback(DataSet().fromList(seq), z)
h2 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), A2, B2, pi2) n1 = NormalDistribution(2.5, 0.5) n2 = NormalDistribution(6.0, 0.8) mult1 = MultinomialDistribution(3, 4, [0.23, 0.26, 0.26, 0.25], alphabet=DIAG) mult2 = MultinomialDistribution(3, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG) c1 = ProductDistribution([n1, mult1, h1]) c2 = ProductDistribution([n2, mult2, h2]) mpi = [0.4, 0.6] m = MixtureModel(2, mpi, [c1, c2]) #print m #print "-->",m.components[0].suff_dataRange # ----------- constructing complex DataSet ---------------- # mixture for sampling gc1 = ProductDistribution([n1, mult1]) gc2 = ProductDistribution([n2, mult2]) gen = MixtureModel(2, mpi, [gc1, gc2]) dat = gen.sampleSet(100) #print dat # sampling hmm data
d10 = MultinomialDistribution(1, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG) mark5 = NormalDistribution(80, 0.1) pd5 = ProductDistribution([n21, n22, n23, n24, n25, d9, d10, mark5] + nlist) n26 = NormalDistribution(4.0, 1.0) n27 = NormalDistribution(2.50, 0.490) n28 = NormalDistribution(2.52, 0.495) n29 = NormalDistribution(5.52, 0.495) n30 = NormalDistribution(-4.95, 0.5) d11 = MultinomialDistribution(1, 4, [0.21, 0.27, 0.27, 0.25], alphabet=DIAG) d12 = MultinomialDistribution(1, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG) mark6 = NormalDistribution(100, 0.1) pd6 = ProductDistribution([n26, n27, n28, n29, n30, d11, d12, mark6] + nlist) mix = MixtureModel(6, [0.1, 0.1, 0.1, 0.2, 0.2, 0.3], [pd, pd2, pd3, pd4, pd5, pd6], struct=1) data = mix.sampleDataSet(500) #print mix mix.updateStructureGlobal(data) #print mix #print mix.groups #print mix.leaders #writeMixture(mix, "test.mix") #mix.evalStructure(data.headers)
g = lambda x: x/sum(p) p = map(g,p) g2 = lambda x: x/sum(p2) p2 = map(g2,p2) g5 = lambda x: x/sum(p5) p5 = map(g5,p5) multi = MultinomialDistribution(80,N,p) multi2 = MultinomialDistribution(80,N,p2) multi5 = MultinomialDistribution(80,N,p5) mix = MixtureModel(3,[0.5,0.25,0.25],[multi,multi2,multi5]) print mix [true, s] = mix.labelled_sample(1000) p3 = [] p4 = [] p6 = [] for i in range(N): p3.append(random()) p4.append(random()) p6.append(random()) g3 = lambda x: x/sum(p3) p3 = map(g3,p3)
# # m2 = MixtureModel(3, [0.2, 0.4, 0.4], # [NormalDistribution(-3.5, 0.5), # NormalDistribution(0.5, 1.5), # NormalDistribution(4.0, 0.6) # ]) # # m2.randParams(seq) # t1 = clock() # m2.EM(seq, 40, 0.0) # t2 = clock() # print "time = ", t2 - t1 # print m2 # ----------------------------- Example 2 ----------------------------- e1 = MixtureModel(2, [0.7, 0.3], [NormalDistribution(0.0, 0.4), ExponentialDistribution(0.5)]) seq2 = e1.sample(500) e2 = MixtureModel(2, [0.5, 0.5], [NormalDistribution(2.0, 0.4), ExponentialDistribution(0.1)]) # e2.EM(seq2,60,5) # ----------------------------- Example 3 ----------------------------- m3 = MixtureModel(2, [0.3, 0.7], [NormalDistribution(0.0, 0.5), NormalDistribution(1.3, 0.5)]) (true, seq3) = m3.sampleSetLabels(380) m4 = MixtureModel(2, [0.5, 0.5], [NormalDistribution(-1.5, 1.5), NormalDistribution(1.5, 1.5)])
random() ] g = lambda x: x / sum(piList) piList = map(g, piList) #print piList #print sum(piList) for i in range(10): par = [random(), random(), random()] f = lambda x: x / sum(par) par = map(f, par) # print par dList.append(MultinomialDistribution(6, 3, par)) mix = MixtureModel(1000, piList, dList) s = mix.sample() dList2 = [] piList2 = [ random(), random(), random(), random(), random(), random(), random(), random(), random(), random()
# m2 = MixtureModel(3, [0.2, 0.4, 0.4], # [NormalDistribution(-3.5, 0.5), # NormalDistribution(0.5, 1.5), # NormalDistribution(4.0, 0.6) # ]) # # m2.randParams(seq) # t1 = clock() # m2.EM(seq, 40, 0.0) # t2 = clock() # print "time = ", t2 - t1 # print m2 # ----------------------------- Example 2 ----------------------------- e1 = MixtureModel(2, [0.7, 0.3], [NormalDistribution(0.0, 0.4), ExponentialDistribution(0.5)]) seq2 = e1.sample(500) e2 = MixtureModel(2, [0.5, 0.5], [NormalDistribution(2.0, 0.4), ExponentialDistribution(0.1)]) #e2.EM(seq2,60,5) # ----------------------------- Example 3 ----------------------------- m3 = MixtureModel(2, [0.3, 0.7], [NormalDistribution(0.0, 0.5), NormalDistribution(1.3, 0.5)])
class HMMTests(FuzzyTestCase): def setUp(self): # building generating models self.DIAG = Alphabet(['.', '0', '8', '1']) A = [[0.3, 0.6, 0.1], [0.0, 0.5, 0.5], [0.4, 0.2, 0.4]] B = [[0.5, 0.2, 0.1, 0.2], [0.5, 0.4, 0.05, 0.05], [0.8, 0.1, 0.05, 0.05]] pi = [1.0, 0.0, 0.0] self.h1 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), A, B, pi) A2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] B2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.5], [0.2, 0.2, 0.3, 0.3]] pi2 = [0.6, 0.4, 0.0] self.h2 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), A2, B2, pi2) n1 = NormalDistribution(2.5, 0.5) n2 = NormalDistribution(6.0, 0.8) mult1 = MultinomialDistribution(3, 4, [0.23, 0.26, 0.26, 0.25], alphabet=self.DIAG) mult2 = MultinomialDistribution(3, 4, [0.7, 0.1, 0.1, 0.1], alphabet=self.DIAG) c1 = ProductDistribution([n1, mult1, self.h1]) c2 = ProductDistribution([n2, mult2, self.h2]) mpi = [0.4, 0.6] self.m = MixtureModel(2, mpi, [c1, c2]) # mixture for sampling gc1 = ProductDistribution([n1, mult1]) gc2 = ProductDistribution([n2, mult2]) self.gen = MixtureModel(2, mpi, [gc1, gc2]) def testinternalinitcomplex(self): # complex DataSet with HMM sequences dat = self.gen.sampleSet(100) # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM(dat, [seq1]) data.internalInit(self.m) self.assertEqual(str(data.complexFeature), '[0, 0, 1]') self.assertEqual(data.p, 5) self.assertEqual(data.suff_p, 6) def testinternalinitcomplexempty(self): # complex DataSet with HMM sequences only # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM([], [seq1]) self.assertRaises(AssertionError, data.internalInit, self.m) c1 = ProductDistribution([self.h1]) c2 = ProductDistribution([self.h2]) mpi = [0.4, 0.6] hm = MixtureModel(2, mpi, [c1, c2]) data.internalInit(hm) self.assertEqual(str(data.complexFeature), '[1]') self.assertEqual(data.p, 1) self.assertEqual(data.suff_p, 1) def testgetinternalfeature(self): # complex DataSet with HMM sequences dat = self.gen.sampleSet(100) # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM(dat, [seq1]) data.internalInit(self.m) f0 = data.getInternalFeature(0) self.assertEqual(isinstance(f0, numarray.numarraycore.NumArray), True) f1 = data.getInternalFeature(1) self.assertEqual(isinstance(f1, numarray.numarraycore.NumArray), True) f2 = data.getInternalFeature(2) self.assertEqual(isinstance(f2, ghmm.SequenceSet), True) def testem(self): # complex DataSet with HMM sequences and scalar data dat = self.gen.sampleSet(100) # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM(dat, [seq1]) data.internalInit(self.m) tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]] tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2], [0.4, 0.3, 0.15, 0.15]] tpi = [0.3, 0.3, 0.4] th1 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi) tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4], [0.2, 0.1, 0.6, 0.1]] tpi2 = [0.3, 0.4, 0.3] th2 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2) tn1 = NormalDistribution(-1.5, 1.5) tn2 = NormalDistribution(9.0, 1.2) tmult1 = MultinomialDistribution(3, 4, [0.1, 0.1, 0.55, 0.25], alphabet=self.DIAG) tmult2 = MultinomialDistribution(3, 4, [0.4, 0.3, 0.1, 0.2], alphabet=self.DIAG) tc1 = ProductDistribution([tn1, tmult1, th1]) tc2 = ProductDistribution([tn2, tmult2, th2]) tmpi = [0.7, 0.3] tm = MixtureModel(2, tmpi, [tc1, tc2]) tm.EM(data, 80, 0.1, silent=1) def testememptylist(self): # complex DataSet with HMM sequences only # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM([], [seq1]) tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]] tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2], [0.4, 0.3, 0.15, 0.15]] tpi = [0.3, 0.3, 0.4] th1 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi) tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4], [0.2, 0.1, 0.6, 0.1]] tpi2 = [0.3, 0.4, 0.3] th2 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2) c1 = ProductDistribution([th1]) c2 = ProductDistribution([th2]) mpi = [0.4, 0.6] hm = MixtureModel(2, mpi, [c1, c2]) data.internalInit(hm) hm.EM(data, 40, 0.1, silent=1) def testsimpleem(self): # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM([], [seq1]) tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]] tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2], [0.4, 0.3, 0.15, 0.15]] tpi = [0.3, 0.3, 0.4] th1 = ProductDistribution([mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi)]) tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4], [0.2, 0.1, 0.6, 0.1]] tpi2 = [0.3, 0.4, 0.3] th2 = ProductDistribution([mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2)]) mpi = [0.4, 0.6] hm = MixtureModel(2, mpi, [th1, th2]) data.internalInit(hm) hm.EM(data, 80, 0.1, silent=1)
par = [random(), random(), random(),random(), random(), random()] f = lambda x: x / sum(par) par = map(f,par) dList.append( MultinomialDistribution(6,6,par)) pdList.append( ProductDistribution(dList)) piList = [random(), random(), random()] g = lambda x: x / sum(piList) piList = map(g,piList) mix = MixtureModel(3,piList,pdList) dat = mix.sampleDataSet(1000) pdList2= [] for j in range(3): dList2 = [] for i in range(10): par2 = [random(), random(), random(),random(), random(), random()] f = lambda x: x / sum(par2) par2 = map(f,par2) dList2.append( MultinomialDistribution(6,6,par2)) pdList2.append( ProductDistribution(dList2))
items.reverse() new_pi = np.array(mixx.pi.tolist() + [0.01])[::-1] new_pi = new_pi / np.sum(new_pi) #items = items + [MultiNormalDistribution(4, means, sigma)] items = items + [MultivariateTDistribution(DIMS, means, sigma, 5)] # Fix parameters of all components but the new one: #comp_fix = [1] * (len(new_pi) - 1) + [0] return MixtureModel(len(new_pi), new_pi, items) #import ipdb; ipdb.set_trace() st = MultivariateTDistribution(DIMS, xy.mean(axis=1)*1.1, np.diag(xy.var(axis=1)), 3) da = xy.T #[:500] ds = DataSet() ds.fromArray(da) m = MixtureModel(1, [1], [st]) print m m.EM(ds, 60, 0.1) print m #import ipdb; ipdb.set_trace() #m2 = mix.MixtureModel(2, [0.8, 0.2], [m.components[0].distList[0], d2], compFix=[0, 0]) for _ in xrange(6): m = mixturate(m, xy.mean(axis=1)-10., np.diag(xy.var(axis=1))) m.randMaxEM(ds, 3, 30, 0.1) print m import joblib joblib.dump(m, 'test2.mix', compress=3) pl.plotData(da[:, :2]) col = 'rgbcmyk' icol = 0
from pymix.distributions.normal import NormalDistribution from pymix.distributions.product import ProductDistribution from pymix.models.mixture import MixtureModel from pymix.util.dataset import DataSet pr1 = ProductDistribution([ NormalDistribution(-6.0, 0.5), NormalDistribution(-4.0, 0.5), NormalDistribution(-3.0, 0.5) ]) pr2 = ProductDistribution([ NormalDistribution(-5.0, 0.5), NormalDistribution(-3.3, 0.5), NormalDistribution(-2.3, 0.5) ]) m = MixtureModel(2, [0.7, 0.3], [pr1, pr2]) seq = m.sampleSet(5) #print seq z = 0 m.printTraceback(DataSet().fromList(seq), z)
class HMMTests(FuzzyTestCase): def setUp(self): # building generating models self.DIAG = Alphabet(['.', '0', '8', '1']) A = [[0.3, 0.6, 0.1], [0.0, 0.5, 0.5], [0.4, 0.2, 0.4]] B = [[0.5, 0.2, 0.1, 0.2], [0.5, 0.4, 0.05, 0.05], [0.8, 0.1, 0.05, 0.05]] pi = [1.0, 0.0, 0.0] self.h1 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), A, B, pi) A2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] B2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.5], [0.2, 0.2, 0.3, 0.3]] pi2 = [0.6, 0.4, 0.0] self.h2 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), A2, B2, pi2) n1 = NormalDistribution(2.5, 0.5) n2 = NormalDistribution(6.0, 0.8) mult1 = MultinomialDistribution(3, 4, [0.23, 0.26, 0.26, 0.25], alphabet=self.DIAG) mult2 = MultinomialDistribution(3, 4, [0.7, 0.1, 0.1, 0.1], alphabet=self.DIAG) c1 = ProductDistribution([n1, mult1, self.h1]) c2 = ProductDistribution([n2, mult2, self.h2]) mpi = [0.4, 0.6] self.m = MixtureModel(2, mpi, [c1, c2]) # mixture for sampling gc1 = ProductDistribution([n1, mult1]) gc2 = ProductDistribution([n2, mult2]) self.gen = MixtureModel(2, mpi, [gc1, gc2]) def testinternalinitcomplex(self): # complex DataSet with HMM sequences dat = self.gen.sampleSet(100) # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM(dat, [seq1]) data.internalInit(self.m) self.assertEqual(str(data.complexFeature), '[0, 0, 1]') self.assertEqual(data.p, 5) self.assertEqual(data.suff_p, 6) def testinternalinitcomplexempty(self): # complex DataSet with HMM sequences only # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM([], [seq1]) self.assertRaises(AssertionError, data.internalInit, self.m) c1 = ProductDistribution([self.h1]) c2 = ProductDistribution([self.h2]) mpi = [0.4, 0.6] hm = MixtureModel(2, mpi, [c1, c2]) data.internalInit(hm) self.assertEqual(str(data.complexFeature), '[1]') self.assertEqual(data.p, 1) self.assertEqual(data.suff_p, 1) def testgetinternalfeature(self): # complex DataSet with HMM sequences dat = self.gen.sampleSet(100) # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM(dat, [seq1]) data.internalInit(self.m) f0 = data.getInternalFeature(0) self.assertEqual(isinstance(f0, numarray.numarraycore.NumArray), True) f1 = data.getInternalFeature(1) self.assertEqual(isinstance(f1, numarray.numarraycore.NumArray), True) f2 = data.getInternalFeature(2) self.assertEqual(isinstance(f2, ghmm.SequenceSet), True) def testem(self): # complex DataSet with HMM sequences and scalar data dat = self.gen.sampleSet(100) # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM(dat, [seq1]) data.internalInit(self.m) tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]] tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2], [0.4, 0.3, 0.15, 0.15]] tpi = [0.3, 0.3, 0.4] th1 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi) tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4], [0.2, 0.1, 0.6, 0.1]] tpi2 = [0.3, 0.4, 0.3] th2 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2) tn1 = NormalDistribution(-1.5, 1.5) tn2 = NormalDistribution(9.0, 1.2) tmult1 = MultinomialDistribution(3, 4, [0.1, 0.1, 0.55, 0.25], alphabet=self.DIAG) tmult2 = MultinomialDistribution(3, 4, [0.4, 0.3, 0.1, 0.2], alphabet=self.DIAG) tc1 = ProductDistribution([tn1, tmult1, th1]) tc2 = ProductDistribution([tn2, tmult2, th2]) tmpi = [0.7, 0.3] tm = MixtureModel(2, tmpi, [tc1, tc2]) tm.EM(data, 80, 0.1, silent=1) def testememptylist(self): # complex DataSet with HMM sequences only # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM([], [seq1]) tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]] tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2], [0.4, 0.3, 0.15, 0.15]] tpi = [0.3, 0.3, 0.4] th1 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi) tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4], [0.2, 0.1, 0.6, 0.1]] tpi2 = [0.3, 0.4, 0.3] th2 = mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2) c1 = ProductDistribution([th1]) c2 = ProductDistribution([th2]) mpi = [0.4, 0.6] hm = MixtureModel(2, mpi, [c1, c2]) data.internalInit(hm) hm.EM(data, 40, 0.1, silent=1) def testsimpleem(self): # sampling hmm data seq1 = self.h1.hmm.sample(40, 10) seq2 = self.h2.hmm.sample(60, 10) seq1.merge(seq2) data = mixtureHMM.SequenceDataSet() data.fromGHMM([], [seq1]) tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]] tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2], [0.4, 0.3, 0.15, 0.15]] tpi = [0.3, 0.3, 0.4] th1 = ProductDistribution([ mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi) ]) tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]] tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4], [0.2, 0.1, 0.6, 0.1]] tpi2 = [0.3, 0.4, 0.3] th2 = ProductDistribution([ mixtureHMM.getHMM( mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution( mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2) ]) mpi = [0.4, 0.6] hm = MixtureModel(2, mpi, [th1, th2]) data.internalInit(hm) hm.EM(data, 80, 0.1, silent=1)
def getRandomMixture(G, p, KL_lower, KL_upper, dtypes='discgauss', M=4,seed = None): # if seed: # random.seed(seed) # mixextend.set_gsl_rng_seed(seed) # #print '*** seed=',seed # # else: # XXX debug # seed = random.randint(1,9000000) # mixextend.set_gsl_rng_seed(seed) # random.seed(seed) # #print '*** seed=',seed #M = 4 # Alphabet size for discrete distributions min_sigma = 0.1 # minimal std for Normal max_sigma = 1.0 # maximal std for Normal min_mu = -5.0 # minimal mean max_mu = 8.0 # maximal mean if dtypes == 'disc': featureTypes = [0] * p elif dtypes == 'gauss': featureTypes = [1] * p elif dtypes == 'discgauss': # discrete or Normal features for now, chosen uniformly # 0 discrete, 1 Normal featureTypes = [ random.choice( (0, 1) ) for i in range(p) ] else: raise TypeError #print featureTypes C = [] for j in range(p): c_j = [] for i in range(G): #print i,j if featureTypes[j] == 0: acc = 0 while acc == 0: cand = DiscreteDistribution(M, random_vector(M) ) #print 'cand:',cand acc = 1 for d in c_j: KL_dist = sym_kl_dist(d,cand) if KL_dist > KL_upper or KL_dist < KL_lower: #print ' *', cand, 'rejected:', d , KL_dist acc = 0 break c_j.append(cand) elif featureTypes[j] == 1: acc = 0 while acc == 0: mu = random.uniform(min_mu, max_mu) sigma = random.uniform(min_sigma, max_sigma) cand = NormalDistribution(mu, sigma ) #print 'cand:',cand acc = 1 for d in c_j: KL_dist = sym_kl_dist(d,cand) if KL_dist > KL_upper or KL_dist < KL_lower: #print ' *', cand, 'rejected:', d , KL_dist acc = 0 c_j.append(cand) else: RuntimeError C.append(c_j) # print '\n' # for cc in C: # print cc comps = [] for i in range(G): comps.append( ProductDistribution( [ C[j][i] for j in range(p) ] ) ) pi = get_random_pi(G,0.1) m = MixtureModel(G,pi, comps,struct=1) m.updateFreeParams() return m
d9 = MultinomialDistribution(1, 4, [0.21, 0.27, 0.27, 0.25], alphabet=DIAG) d10 = MultinomialDistribution(1, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG) mark5 = NormalDistribution(80, 0.1) pd5 = ProductDistribution([n21, n22, n23, n24, n25, d9, d10, mark5] + nlist) n26 = NormalDistribution(4.0, 1.0) n27 = NormalDistribution(2.50, 0.490) n28 = NormalDistribution(2.52, 0.495) n29 = NormalDistribution(5.52, 0.495) n30 = NormalDistribution(-4.95, 0.5) d11 = MultinomialDistribution(1, 4, [0.21, 0.27, 0.27, 0.25], alphabet=DIAG) d12 = MultinomialDistribution(1, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG) mark6 = NormalDistribution(100, 0.1) pd6 = ProductDistribution([n26, n27, n28, n29, n30, d11, d12, mark6] + nlist) mix = MixtureModel(6, [0.1, 0.1, 0.1, 0.2, 0.2, 0.3], [pd, pd2, pd3, pd4, pd5, pd6], struct=1) data = mix.sampleDataSet(500) # print mix mix.updateStructureGlobal(data) # print mix # print mix.groups # print mix.leaders # writeMixture(mix, "test.mix") # mix.evalStructure(data.headers)