Esempio n. 1
0
    def testem(self):
        # complex DataSet with HMM sequences and scalar data
        dat = self.gen.sampleSet(100)

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)

        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM(dat, [seq1])
        data.internalInit(self.m)

        tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]]
        tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2],
              [0.4, 0.3, 0.15, 0.15]]
        tpi = [0.3, 0.3, 0.4]
        th1 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi)

        tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4],
               [0.2, 0.1, 0.6, 0.1]]
        tpi2 = [0.3, 0.4, 0.3]
        th2 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2)

        tn1 = NormalDistribution(-1.5, 1.5)
        tn2 = NormalDistribution(9.0, 1.2)

        tmult1 = MultinomialDistribution(3,
                                         4, [0.1, 0.1, 0.55, 0.25],
                                         alphabet=self.DIAG)
        tmult2 = MultinomialDistribution(3,
                                         4, [0.4, 0.3, 0.1, 0.2],
                                         alphabet=self.DIAG)

        tc1 = ProductDistribution([tn1, tmult1, th1])
        tc2 = ProductDistribution([tn2, tmult2, th2])

        tmpi = [0.7, 0.3]
        tm = MixtureModel(2, tmpi, [tc1, tc2])

        tm.EM(data, 80, 0.1, silent=1)
Esempio n. 2
0
    def setUp(self):
        # building generating models
        self.DIAG = Alphabet(['.', '0', '8', '1'])

        A = [[0.3, 0.6, 0.1], [0.0, 0.5, 0.5], [0.4, 0.2, 0.4]]
        B = [[0.5, 0.2, 0.1, 0.2], [0.5, 0.4, 0.05, 0.05], [0.8, 0.1, 0.05, 0.05]]
        pi = [1.0, 0.0, 0.0]
        self.h1 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), A, B, pi)

        A2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        B2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.5], [0.2, 0.2, 0.3, 0.3]]
        pi2 = [0.6, 0.4, 0.0]
        self.h2 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), A2, B2, pi2)

        n1 = NormalDistribution(2.5, 0.5)
        n2 = NormalDistribution(6.0, 0.8)

        mult1 = MultinomialDistribution(3, 4, [0.23, 0.26, 0.26, 0.25], alphabet=self.DIAG)
        mult2 = MultinomialDistribution(3, 4, [0.7, 0.1, 0.1, 0.1], alphabet=self.DIAG)

        c1 = ProductDistribution([n1, mult1, self.h1])
        c2 = ProductDistribution([n2, mult2, self.h2])

        mpi = [0.4, 0.6]
        self.m = MixtureModel(2, mpi, [c1, c2])

        # mixture for sampling
        gc1 = ProductDistribution([n1, mult1])
        gc2 = ProductDistribution([n2, mult2])
        self.gen = MixtureModel(2, mpi, [gc1, gc2])
Esempio n. 3
0
def parseMix(fileHandle, mtype, G, pi, compFix, leaders=None, groups=None):
    """
    Parses a flat file for a mixture model. Internal function, is invoked from
    readMixture.

    """
    components = []
    while len(components) < G:
        components.append(parseFile(fileHandle))

    if mtype == 'Mix':
        from pymix.models.mixture import MixtureModel
        m = MixtureModel(G, pi, components, compFix=compFix)

    elif mtype == 'labelBayesMix':
        from pymix.models.labeled_bayes import labeledBayesMixtureModel
        prior = parseFile(fileHandle)
        if sum(compFix) > 0:  # XXX pass compFix if it is not trivial
            m = labeledBayesMixtureModel(G,
                                         pi,
                                         components,
                                         prior,
                                         compFix=compFix)
        else:
            m = labeledBayesMixtureModel(G, pi, components, prior)

    elif mtype == 'BayesMix':
        from pymix.models.bayes import BayesMixtureModel
        prior = parseFile(fileHandle)
        if sum(compFix) > 0:  # XXX pass compFix if it is not trivial
            m = BayesMixtureModel(G, pi, components, prior, compFix=compFix)
        else:
            m = BayesMixtureModel(G, pi, components, prior)

    else:
        raise TypeError
    if leaders and groups:
        m.initStructure()
        m.leaders = leaders
        m.groups = groups
        for i in range(m.dist_nr):
            for lead in m.leaders[i]:
                for g in m.groups[i][lead]:
                    if not m.components[lead][i] == m.components[g][i]:
                        raise IOError, 'Incompatible CSI structure and parameter values in parseMix.'
                    m.components[g][i] = m.components[lead][i]
    return m
Esempio n. 4
0
    def setUp(self):
        # building generating models
        self.DIAG = Alphabet(['.', '0', '8', '1'])

        A = [[0.3, 0.6, 0.1], [0.0, 0.5, 0.5], [0.4, 0.2, 0.4]]
        B = [[0.5, 0.2, 0.1, 0.2], [0.5, 0.4, 0.05, 0.05],
             [0.8, 0.1, 0.05, 0.05]]
        pi = [1.0, 0.0, 0.0]
        self.h1 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), A, B, pi)

        A2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        B2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.5], [0.2, 0.2, 0.3, 0.3]]
        pi2 = [0.6, 0.4, 0.0]
        self.h2 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), A2, B2, pi2)

        n1 = NormalDistribution(2.5, 0.5)
        n2 = NormalDistribution(6.0, 0.8)

        mult1 = MultinomialDistribution(3,
                                        4, [0.23, 0.26, 0.26, 0.25],
                                        alphabet=self.DIAG)
        mult2 = MultinomialDistribution(3,
                                        4, [0.7, 0.1, 0.1, 0.1],
                                        alphabet=self.DIAG)

        c1 = ProductDistribution([n1, mult1, self.h1])
        c2 = ProductDistribution([n2, mult2, self.h2])

        mpi = [0.4, 0.6]
        self.m = MixtureModel(2, mpi, [c1, c2])

        # mixture for sampling
        gc1 = ProductDistribution([n1, mult1])
        gc2 = ProductDistribution([n2, mult2])
        self.gen = MixtureModel(2, mpi, [gc1, gc2])
Esempio n. 5
0
    def testememptylist(self):
        # complex DataSet with HMM sequences only

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)
        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM([], [seq1])

        tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]]
        tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2],
              [0.4, 0.3, 0.15, 0.15]]
        tpi = [0.3, 0.3, 0.4]
        th1 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi)

        tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4],
               [0.2, 0.1, 0.6, 0.1]]
        tpi2 = [0.3, 0.4, 0.3]
        th2 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2)

        c1 = ProductDistribution([th1])
        c2 = ProductDistribution([th2])

        mpi = [0.4, 0.6]
        hm = MixtureModel(2, mpi, [c1, c2])

        data.internalInit(hm)

        hm.EM(data, 40, 0.1, silent=1)
Esempio n. 6
0
def parseMix(fileHandle, mtype, G, pi, compFix, leaders=None, groups=None):
    """
    Parses a flat file for a mixture model. Internal function, is invoked from
    readMixture.

    """
    components = []
    while len(components) < G:
        components.append(parseFile(fileHandle))

    if mtype == 'Mix':
        from pymix.models.mixture import MixtureModel
        m = MixtureModel(G, pi, components, compFix=compFix)

    elif mtype == 'labelBayesMix':
        from pymix.models.labeled_bayes import labeledBayesMixtureModel
        prior = parseFile(fileHandle)
        if sum(compFix) > 0: # XXX pass compFix if it is not trivial
            m = labeledBayesMixtureModel(G, pi, components, prior, compFix=compFix)
        else:
            m = labeledBayesMixtureModel(G, pi, components, prior)

    elif mtype == 'BayesMix':
        from pymix.models.bayes import BayesMixtureModel
        prior = parseFile(fileHandle)
        if sum(compFix) > 0: # XXX pass compFix if it is not trivial
            m = BayesMixtureModel(G, pi, components, prior, compFix=compFix)
        else:
            m = BayesMixtureModel(G, pi, components, prior)

    else:
        raise TypeError
    if leaders and groups:
        m.initStructure()
        m.leaders = leaders
        m.groups = groups
        for i in range(m.dist_nr):
            for lead in m.leaders[i]:
                for g in m.groups[i][lead]:
                    if not m.components[lead][i] == m.components[g][i]:
                        raise IOError, 'Incompatible CSI structure and parameter values in parseMix.'
                    m.components[g][i] = m.components[lead][i]
    return m
Esempio n. 7
0
    def testinternalinitcomplexempty(self):
        # complex DataSet with HMM sequences only

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)
        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM([], [seq1])

        self.assertRaises(AssertionError, data.internalInit, self.m)

        c1 = ProductDistribution([self.h1])
        c2 = ProductDistribution([self.h2])

        mpi = [0.4, 0.6]
        hm = MixtureModel(2, mpi, [c1, c2])

        data.internalInit(hm)

        self.assertEqual(str(data.complexFeature), '[1]')
        self.assertEqual(data.p, 1)
        self.assertEqual(data.suff_p, 1)
Esempio n. 8
0
def getRandomMixture(G,
                     p,
                     KL_lower,
                     KL_upper,
                     dtypes='discgauss',
                     M=4,
                     seed=None):

    #    if seed:
    #        random.seed(seed)
    #        mixextend.set_gsl_rng_seed(seed)
    #        #print '*** seed=',seed
    #
    #    else: # XXX debug
    #        seed = random.randint(1,9000000)
    #        mixextend.set_gsl_rng_seed(seed)
    #        random.seed(seed)
    #        #print '*** seed=',seed

    #M = 4  # Alphabet size for discrete distributions

    min_sigma = 0.1  # minimal std for Normal
    max_sigma = 1.0  # maximal std for Normal
    min_mu = -5.0  # minimal mean
    max_mu = 8.0  # maximal mean

    if dtypes == 'disc':
        featureTypes = [0] * p
    elif dtypes == 'gauss':
        featureTypes = [1] * p
    elif dtypes == 'discgauss':
        # discrete or Normal features for now, chosen uniformly
        # 0 discrete, 1 Normal
        featureTypes = [random.choice((0, 1)) for i in range(p)]
    else:
        raise TypeError

    #print featureTypes

    C = []
    for j in range(p):
        c_j = []
        for i in range(G):
            #print i,j
            if featureTypes[j] == 0:
                acc = 0
                while acc == 0:
                    cand = DiscreteDistribution(M, random_vector(M))

                    #print 'cand:',cand

                    acc = 1

                    for d in c_j:
                        KL_dist = sym_kl_dist(d, cand)
                        if KL_dist > KL_upper or KL_dist < KL_lower:
                            #print '  *', cand, 'rejected:', d , KL_dist
                            acc = 0
                            break

                c_j.append(cand)
            elif featureTypes[j] == 1:
                acc = 0
                while acc == 0:
                    mu = random.uniform(min_mu, max_mu)
                    sigma = random.uniform(min_sigma, max_sigma)

                    cand = NormalDistribution(mu, sigma)

                    #print 'cand:',cand

                    acc = 1

                    for d in c_j:
                        KL_dist = sym_kl_dist(d, cand)
                        if KL_dist > KL_upper or KL_dist < KL_lower:
                            #print '  *', cand, 'rejected:', d , KL_dist
                            acc = 0

                c_j.append(cand)

            else:
                RuntimeError

        C.append(c_j)


#    print '\n'
#    for cc in C:
#        print cc

    comps = []
    for i in range(G):
        comps.append(ProductDistribution([C[j][i] for j in range(p)]))

    pi = get_random_pi(G, 0.1)

    m = MixtureModel(G, pi, comps, struct=1)
    m.updateFreeParams()

    return m
Esempio n. 9
0
    p2.append(random.random())
    p3.append(random.random())
    p4.append(random.random())

g1 = lambda x: x / sum(p1)
p1 = map(g1, p1)

g2 = lambda x: x / sum(p2)
p2 = map(g2, p2)

g3 = lambda x: x / sum(p3)
p3 = map(g3, p3)

g4 = lambda x: x / sum(p4)
p4 = map(g4, p4)

mult = MultinomialDistribution(6, 25, p1, SNP)
mult2 = MultinomialDistribution(7, 25, p2, SNP)
phi = NormalDistribution(11.0, 4.0)
phi2 = NormalDistribution(11.0, 6.0)
pd1 = ProductDistribution([mult, mult2, phi, phi2])

mult3 = MultinomialDistribution(6, 25, p3, SNP)
mult4 = MultinomialDistribution(7, 25, p4, SNP)
phi3 = NormalDistribution(8.0, 5.0)
phi4 = NormalDistribution(15.0, 5.0)
pd2 = ProductDistribution([mult, mult2, phi, phi2])

m = MixtureModel(2, [0.5, 0.5], [pd1, pd2])
m.EM(d, 15, 0.05)
Esempio n. 10
0
for j in range(3):
    dList = []
    for i in range(10):
        par = [random(), random(), random(), random(), random(), random()]
        f = lambda x: x / sum(par)
        par = map(f, par)

        dList.append(MultinomialDistribution(6, 6, par))

    pdList.append(ProductDistribution(dList))

piList = [random(), random(), random()]
g = lambda x: x / sum(piList)
piList = map(g, piList)

mix = MixtureModel(3, piList, pdList)

dat = mix.sampleDataSet(1000)

pdList2 = []
for j in range(3):
    dList2 = []
    for i in range(10):
        par2 = [random(), random(), random(), random(), random(), random()]
        f = lambda x: x / sum(par2)
        par2 = map(f, par2)

        dList2.append(MultinomialDistribution(6, 6, par2))

    pdList2.append(ProductDistribution(dList2))
Esempio n. 11
0
c1 = ProductDistribution([n1, mult1, h1])
c2 = ProductDistribution([n2, mult2, h2])

mpi = [0.4, 0.6]
m = MixtureModel(2, mpi, [c1, c2])

#print m
#print "-->",m.components[0].suff_dataRange


# ----------- constructing complex DataSet ----------------

# mixture for sampling
gc1 = ProductDistribution([n1, mult1])
gc2 = ProductDistribution([n2, mult2])
gen = MixtureModel(2, mpi, [gc1, gc2])

dat = gen.sampleSet(100)
#print dat

# sampling hmm data
seq1 = h1.hmm.sample(40, 10)
seq2 = h2.hmm.sample(60, 10)

seq1.merge(seq2)

data = mixtureHMM.SequenceDataSet()


#data.fromGHMM(dat,[seq1])
data.fromGHMM(dat, [seq1])
Esempio n. 12
0
from pymix.distributions.normal import NormalDistribution
from pymix.distributions.product import ProductDistribution
from pymix.models.mixture import MixtureModel
from pymix.util.dataset import DataSet

pr1 = ProductDistribution([NormalDistribution(-6.0, 0.5), NormalDistribution(-4.0, 0.5), NormalDistribution(-3.0, 0.5)])
pr2 = ProductDistribution([NormalDistribution(-5.0, 0.5), NormalDistribution(-3.3, 0.5), NormalDistribution(-2.3, 0.5)])

m = MixtureModel(2, [0.7, 0.3], [pr1, pr2])

seq = m.sampleSet(5)

#print seq
z = 0
m.printTraceback(DataSet().fromList(seq), z)
Esempio n. 13
0
h2 = mixtureHMM.getHMM(
    mixtureHMM.ghmm.IntegerRange(0, 4),
    mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)),
    A2, B2, pi2)

n1 = NormalDistribution(2.5, 0.5)
n2 = NormalDistribution(6.0, 0.8)

mult1 = MultinomialDistribution(3, 4, [0.23, 0.26, 0.26, 0.25], alphabet=DIAG)
mult2 = MultinomialDistribution(3, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG)

c1 = ProductDistribution([n1, mult1, h1])
c2 = ProductDistribution([n2, mult2, h2])

mpi = [0.4, 0.6]
m = MixtureModel(2, mpi, [c1, c2])

#print m
#print "-->",m.components[0].suff_dataRange

# ----------- constructing complex DataSet ----------------

# mixture for sampling
gc1 = ProductDistribution([n1, mult1])
gc2 = ProductDistribution([n2, mult2])
gen = MixtureModel(2, mpi, [gc1, gc2])

dat = gen.sampleSet(100)
#print dat

# sampling hmm data
Esempio n. 14
0
d10 = MultinomialDistribution(1, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG)
mark5 = NormalDistribution(80, 0.1)
pd5 = ProductDistribution([n21, n22, n23, n24, n25, d9, d10, mark5] + nlist)

n26 = NormalDistribution(4.0, 1.0)
n27 = NormalDistribution(2.50, 0.490)
n28 = NormalDistribution(2.52, 0.495)
n29 = NormalDistribution(5.52, 0.495)
n30 = NormalDistribution(-4.95, 0.5)
d11 = MultinomialDistribution(1, 4, [0.21, 0.27, 0.27, 0.25], alphabet=DIAG)
d12 = MultinomialDistribution(1, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG)
mark6 = NormalDistribution(100, 0.1)
pd6 = ProductDistribution([n26, n27, n28, n29, n30, d11, d12, mark6] + nlist)

mix = MixtureModel(6, [0.1, 0.1, 0.1, 0.2, 0.2, 0.3],
                   [pd, pd2, pd3, pd4, pd5, pd6],
                   struct=1)

data = mix.sampleDataSet(500)
#print mix

mix.updateStructureGlobal(data)

#print mix
#print mix.groups
#print mix.leaders

#writeMixture(mix, "test.mix")

#mix.evalStructure(data.headers)
Esempio n. 15
0
g = lambda x: x/sum(p)
p = map(g,p)

g2 = lambda x: x/sum(p2)
p2 = map(g2,p2)

g5 = lambda x: x/sum(p5)
p5 = map(g5,p5)


multi = MultinomialDistribution(80,N,p)
multi2 = MultinomialDistribution(80,N,p2)
multi5 = MultinomialDistribution(80,N,p5)

mix = MixtureModel(3,[0.5,0.25,0.25],[multi,multi2,multi5])
print mix

[true, s] = mix.labelled_sample(1000)

p3 = []
p4 = []
p6 = []
for i in range(N):
    p3.append(random())
    p4.append(random())
    p6.append(random())

g3 = lambda x: x/sum(p3)
p3 = map(g3,p3)
Esempio n. 16
0
#
# m2 = MixtureModel(3, [0.2, 0.4, 0.4],
#     [NormalDistribution(-3.5, 0.5),
#         NormalDistribution(0.5, 1.5),
#         NormalDistribution(4.0, 0.6)
#     ])
#
# m2.randParams(seq)
# t1 = clock()
# m2.EM(seq, 40, 0.0)
# t2 = clock()
# print "time = ", t2 - t1
# print m2

#  ----------------------------- Example 2 -----------------------------
e1 = MixtureModel(2, [0.7, 0.3], [NormalDistribution(0.0, 0.4), ExponentialDistribution(0.5)])

seq2 = e1.sample(500)

e2 = MixtureModel(2, [0.5, 0.5], [NormalDistribution(2.0, 0.4), ExponentialDistribution(0.1)])

# e2.EM(seq2,60,5)


#  ----------------------------- Example 3 -----------------------------
m3 = MixtureModel(2, [0.3, 0.7], [NormalDistribution(0.0, 0.5), NormalDistribution(1.3, 0.5)])

(true, seq3) = m3.sampleSetLabels(380)

m4 = MixtureModel(2, [0.5, 0.5], [NormalDistribution(-1.5, 1.5), NormalDistribution(1.5, 1.5)])
Esempio n. 17
0
    random()
]
g = lambda x: x / sum(piList)
piList = map(g, piList)
#print piList
#print sum(piList)

for i in range(10):
    par = [random(), random(), random()]
    f = lambda x: x / sum(par)
    par = map(f, par)
    #    print par

    dList.append(MultinomialDistribution(6, 3, par))

mix = MixtureModel(1000, piList, dList)

s = mix.sample()

dList2 = []
piList2 = [
    random(),
    random(),
    random(),
    random(),
    random(),
    random(),
    random(),
    random(),
    random(),
    random()
Esempio n. 18
0
# m2 = MixtureModel(3, [0.2, 0.4, 0.4],
#     [NormalDistribution(-3.5, 0.5),
#         NormalDistribution(0.5, 1.5),
#         NormalDistribution(4.0, 0.6)
#     ])
#
# m2.randParams(seq)
# t1 = clock()
# m2.EM(seq, 40, 0.0)
# t2 = clock()
# print "time = ", t2 - t1
# print m2

#  ----------------------------- Example 2 -----------------------------
e1 = MixtureModel(2, [0.7, 0.3],
                  [NormalDistribution(0.0, 0.4),
                   ExponentialDistribution(0.5)])

seq2 = e1.sample(500)

e2 = MixtureModel(2, [0.5, 0.5],
                  [NormalDistribution(2.0, 0.4),
                   ExponentialDistribution(0.1)])

#e2.EM(seq2,60,5)

#  ----------------------------- Example 3 -----------------------------
m3 = MixtureModel(2, [0.3, 0.7],
                  [NormalDistribution(0.0, 0.5),
                   NormalDistribution(1.3, 0.5)])
Esempio n. 19
0
class HMMTests(FuzzyTestCase):
    def setUp(self):
        # building generating models
        self.DIAG = Alphabet(['.', '0', '8', '1'])

        A = [[0.3, 0.6, 0.1], [0.0, 0.5, 0.5], [0.4, 0.2, 0.4]]
        B = [[0.5, 0.2, 0.1, 0.2], [0.5, 0.4, 0.05, 0.05], [0.8, 0.1, 0.05, 0.05]]
        pi = [1.0, 0.0, 0.0]
        self.h1 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), A, B, pi)

        A2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        B2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.5], [0.2, 0.2, 0.3, 0.3]]
        pi2 = [0.6, 0.4, 0.0]
        self.h2 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), A2, B2, pi2)

        n1 = NormalDistribution(2.5, 0.5)
        n2 = NormalDistribution(6.0, 0.8)

        mult1 = MultinomialDistribution(3, 4, [0.23, 0.26, 0.26, 0.25], alphabet=self.DIAG)
        mult2 = MultinomialDistribution(3, 4, [0.7, 0.1, 0.1, 0.1], alphabet=self.DIAG)

        c1 = ProductDistribution([n1, mult1, self.h1])
        c2 = ProductDistribution([n2, mult2, self.h2])

        mpi = [0.4, 0.6]
        self.m = MixtureModel(2, mpi, [c1, c2])

        # mixture for sampling
        gc1 = ProductDistribution([n1, mult1])
        gc2 = ProductDistribution([n2, mult2])
        self.gen = MixtureModel(2, mpi, [gc1, gc2])


    def testinternalinitcomplex(self):
        # complex DataSet with HMM sequences
        dat = self.gen.sampleSet(100)

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)

        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM(dat, [seq1])
        data.internalInit(self.m)

        self.assertEqual(str(data.complexFeature), '[0, 0, 1]')
        self.assertEqual(data.p, 5)
        self.assertEqual(data.suff_p, 6)


    def testinternalinitcomplexempty(self):
        # complex DataSet with HMM sequences only

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)
        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM([], [seq1])

        self.assertRaises(AssertionError, data.internalInit, self.m)

        c1 = ProductDistribution([self.h1])
        c2 = ProductDistribution([self.h2])

        mpi = [0.4, 0.6]
        hm = MixtureModel(2, mpi, [c1, c2])

        data.internalInit(hm)

        self.assertEqual(str(data.complexFeature), '[1]')
        self.assertEqual(data.p, 1)
        self.assertEqual(data.suff_p, 1)


    def testgetinternalfeature(self):
        # complex DataSet with HMM sequences
        dat = self.gen.sampleSet(100)

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)

        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM(dat, [seq1])
        data.internalInit(self.m)

        f0 = data.getInternalFeature(0)
        self.assertEqual(isinstance(f0, numarray.numarraycore.NumArray), True)

        f1 = data.getInternalFeature(1)
        self.assertEqual(isinstance(f1, numarray.numarraycore.NumArray), True)

        f2 = data.getInternalFeature(2)
        self.assertEqual(isinstance(f2, ghmm.SequenceSet), True)


    def testem(self):
        # complex DataSet with HMM sequences and scalar data
        dat = self.gen.sampleSet(100)

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)

        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM(dat, [seq1])
        data.internalInit(self.m)

        tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]]
        tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2], [0.4, 0.3, 0.15, 0.15]]
        tpi = [0.3, 0.3, 0.4]
        th1 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi)

        tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4], [0.2, 0.1, 0.6, 0.1]]
        tpi2 = [0.3, 0.4, 0.3]
        th2 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2)

        tn1 = NormalDistribution(-1.5, 1.5)
        tn2 = NormalDistribution(9.0, 1.2)

        tmult1 = MultinomialDistribution(3, 4, [0.1, 0.1, 0.55, 0.25], alphabet=self.DIAG)
        tmult2 = MultinomialDistribution(3, 4, [0.4, 0.3, 0.1, 0.2], alphabet=self.DIAG)

        tc1 = ProductDistribution([tn1, tmult1, th1])
        tc2 = ProductDistribution([tn2, tmult2, th2])

        tmpi = [0.7, 0.3]
        tm = MixtureModel(2, tmpi, [tc1, tc2])

        tm.EM(data, 80, 0.1, silent=1)


    def testememptylist(self):
        # complex DataSet with HMM sequences only

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)
        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM([], [seq1])

        tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]]
        tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2], [0.4, 0.3, 0.15, 0.15]]
        tpi = [0.3, 0.3, 0.4]
        th1 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi)

        tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4], [0.2, 0.1, 0.6, 0.1]]
        tpi2 = [0.3, 0.4, 0.3]
        th2 = mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2)

        c1 = ProductDistribution([th1])
        c2 = ProductDistribution([th2])

        mpi = [0.4, 0.6]
        hm = MixtureModel(2, mpi, [c1, c2])

        data.internalInit(hm)

        hm.EM(data, 40, 0.1, silent=1)

    def testsimpleem(self):
        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)
        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM([], [seq1])

        tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]]
        tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2], [0.4, 0.3, 0.15, 0.15]]
        tpi = [0.3, 0.3, 0.4]
        th1 = ProductDistribution([mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi)])

        tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4], [0.2, 0.1, 0.6, 0.1]]
        tpi2 = [0.3, 0.4, 0.3]
        th2 = ProductDistribution([mixtureHMM.getHMM(mixtureHMM.ghmm.IntegerRange(0, 4), mixtureHMM.ghmm.DiscreteDistribution(mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2)])

        mpi = [0.4, 0.6]
        hm = MixtureModel(2, mpi, [th1, th2])

        data.internalInit(hm)

        hm.EM(data, 80, 0.1, silent=1)
Esempio n. 20
0
        par = [random(), random(), random(),random(), random(), random()]
        f = lambda x: x / sum(par)
        par = map(f,par)


        dList.append( MultinomialDistribution(6,6,par))

    pdList.append( ProductDistribution(dList))


piList = [random(), random(), random()]
g = lambda x: x / sum(piList)
piList = map(g,piList)


mix = MixtureModel(3,piList,pdList)

dat = mix.sampleDataSet(1000)

pdList2= []
for j in range(3):
    dList2 = []
    for i in range(10):
        par2 = [random(), random(), random(),random(), random(), random()]
        f = lambda x: x / sum(par2)
        par2 = map(f,par2)


        dList2.append( MultinomialDistribution(6,6,par2))

    pdList2.append( ProductDistribution(dList2))
Esempio n. 21
0
    items.reverse()
    new_pi = np.array(mixx.pi.tolist() + [0.01])[::-1]
    new_pi = new_pi / np.sum(new_pi)
    #items = items + [MultiNormalDistribution(4, means, sigma)]
    items = items + [MultivariateTDistribution(DIMS, means, sigma, 5)]
    # Fix parameters of all components but the new one:
    #comp_fix = [1] * (len(new_pi) - 1) + [0]
    return MixtureModel(len(new_pi), new_pi, items)


#import ipdb; ipdb.set_trace()
st = MultivariateTDistribution(DIMS, xy.mean(axis=1)*1.1, np.diag(xy.var(axis=1)), 3)
da = xy.T #[:500]
ds = DataSet()
ds.fromArray(da)
m = MixtureModel(1, [1], [st])
print m
m.EM(ds, 60, 0.1)
print m
#import ipdb; ipdb.set_trace()
#m2 = mix.MixtureModel(2, [0.8, 0.2], [m.components[0].distList[0], d2], compFix=[0, 0])
for _ in xrange(6):
    m = mixturate(m, xy.mean(axis=1)-10., np.diag(xy.var(axis=1)))
    m.randMaxEM(ds, 3, 30, 0.1)
    print m

import joblib
joblib.dump(m, 'test2.mix', compress=3)
pl.plotData(da[:, :2])
col = 'rgbcmyk'
icol = 0
Esempio n. 22
0
from pymix.distributions.normal import NormalDistribution
from pymix.distributions.product import ProductDistribution
from pymix.models.mixture import MixtureModel
from pymix.util.dataset import DataSet

pr1 = ProductDistribution([
    NormalDistribution(-6.0, 0.5),
    NormalDistribution(-4.0, 0.5),
    NormalDistribution(-3.0, 0.5)
])
pr2 = ProductDistribution([
    NormalDistribution(-5.0, 0.5),
    NormalDistribution(-3.3, 0.5),
    NormalDistribution(-2.3, 0.5)
])

m = MixtureModel(2, [0.7, 0.3], [pr1, pr2])

seq = m.sampleSet(5)

#print seq
z = 0
m.printTraceback(DataSet().fromList(seq), z)
Esempio n. 23
0
class HMMTests(FuzzyTestCase):
    def setUp(self):
        # building generating models
        self.DIAG = Alphabet(['.', '0', '8', '1'])

        A = [[0.3, 0.6, 0.1], [0.0, 0.5, 0.5], [0.4, 0.2, 0.4]]
        B = [[0.5, 0.2, 0.1, 0.2], [0.5, 0.4, 0.05, 0.05],
             [0.8, 0.1, 0.05, 0.05]]
        pi = [1.0, 0.0, 0.0]
        self.h1 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), A, B, pi)

        A2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        B2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.5], [0.2, 0.2, 0.3, 0.3]]
        pi2 = [0.6, 0.4, 0.0]
        self.h2 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), A2, B2, pi2)

        n1 = NormalDistribution(2.5, 0.5)
        n2 = NormalDistribution(6.0, 0.8)

        mult1 = MultinomialDistribution(3,
                                        4, [0.23, 0.26, 0.26, 0.25],
                                        alphabet=self.DIAG)
        mult2 = MultinomialDistribution(3,
                                        4, [0.7, 0.1, 0.1, 0.1],
                                        alphabet=self.DIAG)

        c1 = ProductDistribution([n1, mult1, self.h1])
        c2 = ProductDistribution([n2, mult2, self.h2])

        mpi = [0.4, 0.6]
        self.m = MixtureModel(2, mpi, [c1, c2])

        # mixture for sampling
        gc1 = ProductDistribution([n1, mult1])
        gc2 = ProductDistribution([n2, mult2])
        self.gen = MixtureModel(2, mpi, [gc1, gc2])

    def testinternalinitcomplex(self):
        # complex DataSet with HMM sequences
        dat = self.gen.sampleSet(100)

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)

        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM(dat, [seq1])
        data.internalInit(self.m)

        self.assertEqual(str(data.complexFeature), '[0, 0, 1]')
        self.assertEqual(data.p, 5)
        self.assertEqual(data.suff_p, 6)

    def testinternalinitcomplexempty(self):
        # complex DataSet with HMM sequences only

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)
        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM([], [seq1])

        self.assertRaises(AssertionError, data.internalInit, self.m)

        c1 = ProductDistribution([self.h1])
        c2 = ProductDistribution([self.h2])

        mpi = [0.4, 0.6]
        hm = MixtureModel(2, mpi, [c1, c2])

        data.internalInit(hm)

        self.assertEqual(str(data.complexFeature), '[1]')
        self.assertEqual(data.p, 1)
        self.assertEqual(data.suff_p, 1)

    def testgetinternalfeature(self):
        # complex DataSet with HMM sequences
        dat = self.gen.sampleSet(100)

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)

        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM(dat, [seq1])
        data.internalInit(self.m)

        f0 = data.getInternalFeature(0)
        self.assertEqual(isinstance(f0, numarray.numarraycore.NumArray), True)

        f1 = data.getInternalFeature(1)
        self.assertEqual(isinstance(f1, numarray.numarraycore.NumArray), True)

        f2 = data.getInternalFeature(2)
        self.assertEqual(isinstance(f2, ghmm.SequenceSet), True)

    def testem(self):
        # complex DataSet with HMM sequences and scalar data
        dat = self.gen.sampleSet(100)

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)

        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM(dat, [seq1])
        data.internalInit(self.m)

        tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]]
        tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2],
              [0.4, 0.3, 0.15, 0.15]]
        tpi = [0.3, 0.3, 0.4]
        th1 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi)

        tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4],
               [0.2, 0.1, 0.6, 0.1]]
        tpi2 = [0.3, 0.4, 0.3]
        th2 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2)

        tn1 = NormalDistribution(-1.5, 1.5)
        tn2 = NormalDistribution(9.0, 1.2)

        tmult1 = MultinomialDistribution(3,
                                         4, [0.1, 0.1, 0.55, 0.25],
                                         alphabet=self.DIAG)
        tmult2 = MultinomialDistribution(3,
                                         4, [0.4, 0.3, 0.1, 0.2],
                                         alphabet=self.DIAG)

        tc1 = ProductDistribution([tn1, tmult1, th1])
        tc2 = ProductDistribution([tn2, tmult2, th2])

        tmpi = [0.7, 0.3]
        tm = MixtureModel(2, tmpi, [tc1, tc2])

        tm.EM(data, 80, 0.1, silent=1)

    def testememptylist(self):
        # complex DataSet with HMM sequences only

        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)
        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM([], [seq1])

        tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]]
        tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2],
              [0.4, 0.3, 0.15, 0.15]]
        tpi = [0.3, 0.3, 0.4]
        th1 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi)

        tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4],
               [0.2, 0.1, 0.6, 0.1]]
        tpi2 = [0.3, 0.4, 0.3]
        th2 = mixtureHMM.getHMM(
            mixtureHMM.ghmm.IntegerRange(0, 4),
            mixtureHMM.ghmm.DiscreteDistribution(
                mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2)

        c1 = ProductDistribution([th1])
        c2 = ProductDistribution([th2])

        mpi = [0.4, 0.6]
        hm = MixtureModel(2, mpi, [c1, c2])

        data.internalInit(hm)

        hm.EM(data, 40, 0.1, silent=1)

    def testsimpleem(self):
        # sampling hmm data
        seq1 = self.h1.hmm.sample(40, 10)
        seq2 = self.h2.hmm.sample(60, 10)
        seq1.merge(seq2)

        data = mixtureHMM.SequenceDataSet()
        data.fromGHMM([], [seq1])

        tA = [[0.5, 0.2, 0.3], [0.2, 0.3, 0.5], [0.1, 0.5, 0.4]]
        tB = [[0.2, 0.4, 0.1, 0.3], [0.5, 0.1, 0.2, 0.2],
              [0.4, 0.3, 0.15, 0.15]]
        tpi = [0.3, 0.3, 0.4]
        th1 = ProductDistribution([
            mixtureHMM.getHMM(
                mixtureHMM.ghmm.IntegerRange(0, 4),
                mixtureHMM.ghmm.DiscreteDistribution(
                    mixtureHMM.ghmm.IntegerRange(0, 4)), tA, tB, tpi)
        ])

        tA2 = [[0.5, 0.4, 0.1], [0.3, 0.2, 0.5], [0.3, 0.2, 0.5]]
        tB2 = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.4, 0.4],
               [0.2, 0.1, 0.6, 0.1]]
        tpi2 = [0.3, 0.4, 0.3]
        th2 = ProductDistribution([
            mixtureHMM.getHMM(
                mixtureHMM.ghmm.IntegerRange(0, 4),
                mixtureHMM.ghmm.DiscreteDistribution(
                    mixtureHMM.ghmm.IntegerRange(0, 4)), tA2, tB2, tpi2)
        ])

        mpi = [0.4, 0.6]
        hm = MixtureModel(2, mpi, [th1, th2])

        data.internalInit(hm)

        hm.EM(data, 80, 0.1, silent=1)
Esempio n. 24
0
def getRandomMixture(G, p, KL_lower, KL_upper, dtypes='discgauss', M=4,seed = None):

#    if seed:
#        random.seed(seed)
#        mixextend.set_gsl_rng_seed(seed)
#        #print '*** seed=',seed
#
#    else: # XXX debug
#        seed = random.randint(1,9000000)
#        mixextend.set_gsl_rng_seed(seed)
#        random.seed(seed)
#        #print '*** seed=',seed


    #M = 4  # Alphabet size for discrete distributions

    min_sigma = 0.1    # minimal std for Normal
    max_sigma = 1.0   # maximal std for Normal
    min_mu = -5.0      # minimal mean
    max_mu = 8.0       # maximal mean

    if dtypes == 'disc':
        featureTypes = [0] * p
    elif dtypes == 'gauss':
        featureTypes = [1] * p
    elif dtypes == 'discgauss':
        # discrete or Normal features for now, chosen uniformly
        # 0 discrete, 1 Normal
        featureTypes = [ random.choice( (0, 1) )  for i in range(p) ]
    else:
        raise TypeError


    #print featureTypes

    C = []
    for j in range(p):
        c_j = []
        for i in range(G):
            #print i,j
            if featureTypes[j] == 0:
                acc = 0
                while acc == 0:
                    cand = DiscreteDistribution(M, random_vector(M) )

                    #print 'cand:',cand

                    acc = 1

                    for d in c_j:
                        KL_dist = sym_kl_dist(d,cand)
                        if KL_dist > KL_upper or KL_dist < KL_lower:
                            #print '  *', cand, 'rejected:', d , KL_dist
                            acc = 0
                            break

                c_j.append(cand)
            elif featureTypes[j] == 1:
                acc = 0
                while acc == 0:
                    mu = random.uniform(min_mu, max_mu)
                    sigma = random.uniform(min_sigma, max_sigma)

                    cand = NormalDistribution(mu, sigma )

                    #print 'cand:',cand

                    acc = 1

                    for d in c_j:
                        KL_dist = sym_kl_dist(d,cand)
                        if KL_dist > KL_upper or KL_dist < KL_lower:
                            #print '  *', cand, 'rejected:', d , KL_dist
                            acc = 0

                c_j.append(cand)

            else:
                RuntimeError

        C.append(c_j)

#    print '\n'
#    for cc in C:
#        print cc


    comps = []
    for i in range(G):
        comps.append( ProductDistribution( [ C[j][i] for j in range(p) ] ) )

    pi = get_random_pi(G,0.1)

    m = MixtureModel(G,pi, comps,struct=1)
    m.updateFreeParams()

    return m
Esempio n. 25
0
d9 = MultinomialDistribution(1, 4, [0.21, 0.27, 0.27, 0.25], alphabet=DIAG)
d10 = MultinomialDistribution(1, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG)
mark5 = NormalDistribution(80, 0.1)
pd5 = ProductDistribution([n21, n22, n23, n24, n25, d9, d10, mark5] + nlist)

n26 = NormalDistribution(4.0, 1.0)
n27 = NormalDistribution(2.50, 0.490)
n28 = NormalDistribution(2.52, 0.495)
n29 = NormalDistribution(5.52, 0.495)
n30 = NormalDistribution(-4.95, 0.5)
d11 = MultinomialDistribution(1, 4, [0.21, 0.27, 0.27, 0.25], alphabet=DIAG)
d12 = MultinomialDistribution(1, 4, [0.7, 0.1, 0.1, 0.1], alphabet=DIAG)
mark6 = NormalDistribution(100, 0.1)
pd6 = ProductDistribution([n26, n27, n28, n29, n30, d11, d12, mark6] + nlist)

mix = MixtureModel(6, [0.1, 0.1, 0.1, 0.2, 0.2, 0.3], [pd, pd2, pd3, pd4, pd5, pd6], struct=1)

data = mix.sampleDataSet(500)
# print mix

mix.updateStructureGlobal(data)

# print mix
# print mix.groups
# print mix.leaders

# writeMixture(mix, "test.mix")

# mix.evalStructure(data.headers)