Ejemplo n.º 1
0
    def testem(self):
        # complex DataSet with HMM sequences and scalar data
        dat = self.gen.sampleSet(100)

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)

        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM(dat, [seq1])
        data.internalInit(self.m)

        tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]]
        tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2],
              [0.4, 0.3, 0.15, 0.15]]
        tpi = [0.3, 0.3, 0.4]
        th1 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi)

        tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4],
               [0.2, 0.1, 0.6, 0.1]]
        tpi2 = [0.3, 0.4, 0.3]
        th2 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2)

        tn1 = NormalDistribution(-1.5, 1.5)
        tn2 = NormalDistribution(9.0, 1.2)

        tmult1 = MultinomialDistribution(3,
                                         4, [0.1, 0.1, 0.55, 0.25],
                                         alphabet=self.DIAG)
        tmult2 = MultinomialDistribution(3,
                                         4, [0.4, 0.3, 0.1, 0.2],
                                         alphabet=self.DIAG)

        tc1 = ProductDistribution([tn1, tmult1, th1])
        tc2 = ProductDistribution([tn2, tmult2, th2])

        tmpi = [0.7, 0.3]
        tm = MixtureModel(2, tmpi, [tc1, tc2])

        tm.EM(data, 80, 0.1, silent=1)
Ejemplo n.º 2
0
    def setUp(self):
        # building generating models
        self.DIAG = Alphabet(['.', '0', '8', '1'])

        A = [[0.3, 0.6, 0.1], [0.0, 0.5, 0.5], [0.4, 0.2, 0.4]]
        B = [[0.5, 0.2, 0.1, 0.2], [0.5, 0.4, 0.05, 0.05],
             [0.8, 0.1, 0.05, 0.05]]
        pi = [1.0, 0.0, 0.0]
        self.h1 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), A, B, pi)

        A2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        B2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.5], [0.2, 0.2, 0.3, 0.3]]
        pi2 = [0.6, 0.4, 0.0]
        self.h2 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), A2, B2, pi2)

        n1 = NormalDistribution(2.5, 0.5)
        n2 = NormalDistribution(6.0, 0.8)

        mult1 = MultinomialDistribution(3,
                                        4, [0.23, 0.26, 0.26, 0.25],
                                        alphabet=self.DIAG)
        mult2 = MultinomialDistribution(3,
                                        4, [0.7, 0.1, 0.1, 0.1],
                                        alphabet=self.DIAG)

        c1 = ProductDistribution([n1, mult1, self.h1])
        c2 = ProductDistribution([n2, mult2, self.h2])

        mpi = [0.4, 0.6]
        self.m = MixtureModel(2, mpi, [c1, c2])

        # mixture for sampling
        gc1 = ProductDistribution([n1, mult1])
        gc2 = ProductDistribution([n2, mult2])
        self.gen = MixtureModel(2, mpi, [gc1, gc2])
Ejemplo n.º 3
0
def parseFile(fileHandle):
    """
    Internal function. Parses flat files.
    """
    s = chomp(fileHandle.readline())
    l = s.split(';')

    if l[1] == "Mix":
        [offset, head, G, pi, compFix] = l
        return parseMix(fileHandle, head, int(G), simple_eval(pi),
                        simple_eval(compFix))
    elif l[1] == "Norm":
        from pymix.distributions.normal import NormalDistribution
        [offset, head, mu, sigma] = l
        return NormalDistribution(float(mu), float(sigma))
    elif l[1] == "Exp":
        from pymix.distributions.exponential import ExponentialDistribution
        [offset, head, lambd] = l
        return ExponentialDistribution(float(lambd))
    elif l[1] == "Mult":
        from pymix.distributions.multinomial import MultinomialDistribution
        [offset, head, N, M, phi, alphabet, parFix] = l
        alph = Alphabet(simple_eval(alphabet))
        return MultinomialDistribution(int(N), int(M), simple_eval(phi), alph,
                                       simple_eval(parFix))
    elif l[1] == "Discrete":
        from pymix.distributions.discrete import DiscreteDistribution
        [offset, head, M, phi, alphabet, parFix] = l
        alph = Alphabet(simple_eval(alphabet))
        return DiscreteDistribution(int(M), simple_eval(phi), alph,
                                    simple_eval(parFix))
    elif l[1] == "MultiNormal":
        from pymix.distributions.multinormal import MultiNormalDistribution
        [offset, head, p, mu, sigma] = l
        # XXX the tokenize package used in simple_eval cannot deal with negative values in
        # mu or sigma. A hack solution to that would be to change simple_eval to a direct
        # call to eval in the line below. This carries all the usual implications for security.
        return MultiNormalDistribution(int(p), simple_eval(mu),
                                       simple_eval(sigma))
    elif l[1] == "Dirichlet":
        from pymix.distributions.dirichlet import DirichletDistribution
        [offset, head, M, alpha] = l
        return DirichletDistribution(int(M), simple_eval(alpha))
    elif l[1] == "DirichletPr":
        from pymix.priors.dirichlet import DirichletPrior
        [offset, head, M, alpha] = l
        return DirichletPrior(int(M), simple_eval(alpha))
    elif l[1] == "NormalGamma":
        from examples.crp import NormalGammaPrior
        [offset, head, mu, kappa, dof, scale] = l
        return NormalGammaPrior(float(mu), float(kappa), float(dof),
                                float(scale))
    # elif l[1] == "PriorForDirichlet":
    #     [offset, head, M, eta] = l
    #     return PriorForDirichletDistribution(int(M), simple_eval(eta))
    elif l[1] == "Prod":
        [offset, head, p] = l
        return parseProd(fileHandle, int(p))
    elif l[1] == "MixPrior":
        #;MixPrior;4;0.7;0.7
        [offset, head, nr_dist, structPrior, nrCompPrior] = l
        return parseMixPrior(fileHandle, int(nr_dist), float(structPrior),
                             float(nrCompPrior))
    elif l[1] == "DirichMixPrior":
        #;DirichMixPrior;3;5;[ 0.3  0.3  0.4]
        [offset, head, G, M, pi] = l
        return parseDirichletMixPrior(fileHandle, int(G), int(M),
                                      simple_eval(pi))
    else:
        raise TypeError, "Unknown keyword: " + str(l[1])
Ejemplo n.º 4
0
for i in range(N):
    p.append(random())
    p2.append(random())
    p5.append(random())

g = lambda x: x/sum(p)
p = map(g,p)

g2 = lambda x: x/sum(p2)
p2 = map(g2,p2)

g5 = lambda x: x/sum(p5)
p5 = map(g5,p5)


multi = MultinomialDistribution(80,N,p)
multi2 = MultinomialDistribution(80,N,p2)
multi5 = MultinomialDistribution(80,N,p5)

mix = MixtureModel(3,[0.5,0.25,0.25],[multi,multi2,multi5])
print mix

[true, s] = mix.labelled_sample(1000)

p3 = []
p4 = []
p6 = []
for i in range(N):
    p3.append(random())
    p4.append(random())
    p6.append(random())
Ejemplo n.º 5
0
    p2.append(random.random())
    p3.append(random.random())
    p4.append(random.random())

g1 = lambda x: x / sum(p1)
p1 = map(g1, p1)

g2 = lambda x: x / sum(p2)
p2 = map(g2, p2)

g3 = lambda x: x / sum(p3)
p3 = map(g3, p3)

g4 = lambda x: x / sum(p4)
p4 = map(g4, p4)

mult = MultinomialDistribution(6, 25, p1, SNP)
mult2 = MultinomialDistribution(7, 25, p2, SNP)
phi = NormalDistribution(11.0, 4.0)
phi2 = NormalDistribution(11.0, 6.0)
pd1 = ProductDistribution([mult, mult2, phi, phi2])

mult3 = MultinomialDistribution(6, 25, p3, SNP)
mult4 = MultinomialDistribution(7, 25, p4, SNP)
phi3 = NormalDistribution(8.0, 5.0)
phi4 = NormalDistribution(15.0, 5.0)
pd2 = ProductDistribution([mult, mult2, phi, phi2])

m = MixtureModel(2, [0.5, 0.5], [pd1, pd2])
m.EM(d, 15, 0.05)
Ejemplo n.º 6
0
from random import random
from pymix.util.times.timer import Timer
from pymix.distributions.multinomial import MultinomialDistribution
from pymix.distributions.product import ProductDistribution
from pymix.models.mixture import MixtureModel

pdList = []
for j in range(3):
    dList = []
    for i in range(10):
        par = [random(), random(), random(), random(), random(), random()]
        f = lambda x: x / sum(par)
        par = map(f, par)

        dList.append(MultinomialDistribution(6, 6, par))

    pdList.append(ProductDistribution(dList))

piList = [random(), random(), random()]
g = lambda x: x / sum(piList)
piList = map(g, piList)

mix = MixtureModel(3, piList, pdList)

dat = mix.sampleDataSet(1000)

pdList2 = []
for j in range(3):
    dList2 = []
    for i in range(10):
        par2 = [random(), random(), random(), random(), random(), random()]
Ejemplo n.º 7
0
#seq = h1.hmm.sample(10,50)
#print seq

A2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
B2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.5], [0.2, 0.2, 0.3, 0.3]]
pi2 = [0.6, 0.4, 0.0]
h2 = mixtureHMM.getHMM(
    mixtureHMM.ghmm.IntegerRange(0, 4),
    mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)),
    A2, B2, pi2)

n1 = NormalDistribution(2.5, 0.5)
n2 = NormalDistribution(6.0, 0.8)

mult1 = MultinomialDistribution(3, 4, [0.23, 0.26, 0.26, 0.25], alphabet=DIAG)
mult2 = MultinomialDistribution(3, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG)

c1 = ProductDistribution([n1, mult1, h1])
c2 = ProductDistribution([n2, mult2, h2])

mpi = [0.4, 0.6]
m = MixtureModel(2, mpi, [c1, c2])

#print m
#print "-->",m.components[0].suff_dataRange

# ----------- constructing complex DataSet ----------------

# mixture for sampling
gc1 = ProductDistribution([n1, mult1])
Ejemplo n.º 8
0
m6 = MixtureModel(1, [1.0], [NormalDistribution(-1.5, 2.5)])

#m6.EM(seq4,1,5)
#print m6

#seq5 = numarray.zeros(900,numarray.Float)
#for i in range(900):
#    seq5[i] = random.normalvariate(0.0,0.5)

#print "var = ", variance(seq5)

# -----------------------------  Example 5 -----------------------------

mc1 = MixtureModel(1, [1.0],
                   [MultinomialDistribution(6, 3, [0.0, 0.25, 0.75])])
mc2 = MixtureModel(1, [1.0], [MultinomialDistribution(6, 3, [0.5, 0.3, 0.2])])

m7 = MixtureModel(2, [0.5, 0.5], [mc1, mc2])

seq6 = m7.sampleSet(150)

mc3 = MixtureModel(1, [1.0], [MultinomialDistribution(6, 3, [0.4, 0.5, 0.1])])
mc4 = MixtureModel(1, [1.0], [MultinomialDistribution(6, 3, [0.2, 0.1, 0.7])])

m8 = MixtureModel(2, [0.1, 0.9], [mc3, mc4])
m8.EM(seq6, 30, 0.3)
#print m8

# -----------------------------  Example 6 -----------------------------
Ejemplo n.º 9
0
                    ],
                    [
                        3, 3, 5, 6, 4, 5, 4, 4, 4, 1, 3, 4, 5, 4, 1, 4, 4, 3,
                        1, 6, 6, 4, 4, 4, 4
                    ],
                    [
                        6, 4, 7, 4, 6, 4, 3, 4, 5, 1, 4, 6, 5, 4, 1, 5, 4, 4,
                        1, 5, 5, 4, 4, 4, 5
                    ]])

#compStructure(G,T)
#raise RuntimeError

DIAG = Alphabet(['.', '0', '8', '1'])

noise1 = MultinomialDistribution(1, 4, [0.5, 0.15, 0.15, 0.2], alphabet=DIAG)
noise2 = NormalDistribution(0, 1.0)
noise3 = MultinomialDistribution(1, 4, [0.1, 0.4, 0.4, 0.1], alphabet=DIAG)
noise4 = MultinomialDistribution(1, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG)
noise5 = NormalDistribution(2.5, 0.5)
noise6 = NormalDistribution(9.0, 1.0)
noise7 = NormalDistribution(19.0, 1.0)
noise8 = NormalDistribution(2.0, 1.0)

nlist = [noise1, noise2, noise3, noise4, noise5, noise6, noise7]

n1 = NormalDistribution(2.5, 0.5)
n2 = NormalDistribution(3.2, 0.5)
n3 = NormalDistribution(2.5, 0.5)
n4 = NormalDistribution(-5.5, 0.5)
n5 = NormalDistribution(-3.0, 0.5)