Esempio n. 1
0
def testLymphData():

	k = 5
	d = 11

	aux = [0]*d

	models = []

	for i in range(k):
	    aux1 = [0]*d
	    aux2 = [0]*d
	    aux3 = [0]*d
  	    models.append(mixture.ProductDistribution([mixture.DependenceTreeDistribution(d,aux1,aux2,aux3)]))

        pi = [1.0]*k
	pi = np.array(pi)/k


        train = mixture.MixtureModel(k,pi,models)

        data = mixture.DataSet()
	data.fromFiles(['data/ltree2_2fold.txt'],)

	train.modelInitialization(data)

        train.EM(data,100,0.01,silent=1)
Esempio n. 2
0
def testdtree():

        tree = {}
        tree[0] = -1
        tree[1] = 0
        tree[2] = 1


        n1 = mixture.ProductDistribution([mixture.ConditionalGaussDistribution(3,[0, 1, 0],
                                                                                 [0, -0.1, 0.1],
                                                                                 [0.5,0.5,0.5],tree)])
        tree2 = {}
        tree2[0] = -1
        tree2[1] = 0
        tree2[2] = 0
        n2 = mixture.ProductDistribution([mixture.ConditionalGaussDistribution(3,[-1, 0, 1],
                                                                                 [0, 0.1, -0.1],
                                                                                 [0.5,0.5,0.5],tree2)])

        pi = [0.4, 0.6]
        gen = mixture.MixtureModel(2,pi,[n1,n2])

        random.seed(1)
        data = gen.sampleDataSet(1000)

        print data



        n1 = mixture.ProductDistribution([mixture.DependenceTreeDistribution(3,[0.1, 1.1, 0.1],
                                                                                 [0, 0, 0],
                                                                                 [1.0,1.0,1.0])])
        n2 = mixture.ProductDistribution([mixture.DependenceTreeDistribution(3,[-1, 0, -0.1],
                                                                                 [0, 0, 0],
                                                                                 [1.0,1.0,1.0])])


        n1 = mixture.ProductDistribution([mixture.ConditionalGaussDistribution(3,[0, 1, 0],
                                                                                 [0.0, 0.1, 0.1],
                                                                                 [0.1,0.1,0.1],tree)])
        n2 = mixture.ProductDistribution([mixture.ConditionalGaussDistribution(3,[-1, 0, 1],
                                                                                 [0.0, 0.1, 0.1],
                                                                                 [0.1,0.1,0.1],tree2)])

        train = mixture.MixtureModel(2,pi,[n1,n2])
	train.modelInitialization(data)
        train.EM(data,100,0.01,silent=1)
Esempio n. 3
0
def setup2():
    # Setting up a two component mixture over four features.
    # Two features are Normal distributions, two discrete.

    # initializing atomar distributions for first component
    n1 = mixture.NormalDistribution(1.0,1.5)
    n2 = mixture.NormalDistribution(2.0,0.5)
    d1 = mixture.DiscreteDistribution(4,[0.1,0.4,0.4,0.1])
    
    c1 = mixture.ProductDistribution([n1,n2])
    c2 = mixture.ProductDistribution([n1,d1])
    
    # intializing mixture
    pi = [0.4,0.6]
    m = mixture.MixtureModel(2,pi,[c1,c2])
    return m
Esempio n. 4
0
def getModel(G, p):
    """
    Constructs a PWM MixtureModel.

    @param G: number of components
    @param p: number of positions of the binding site
    @return: MixtureModel object
    """
    DNA = mixture.Alphabet(['A', 'C', 'G', 'T'])
    comps = []
    for i in range(G):
        dlist = []
        for j in range(p):
            phi = mixture.random_vector(4)
            dlist.append(mixture.DiscreteDistribution(4, phi, DNA))
        comps.append(mixture.ProductDistribution(dlist))
    pi = mixture.random_vector(G)
    m = mixture.MixtureModel(G, pi, comps)
    return m
Esempio n. 5
0
def mixture_model(allele_freq,
                  max_components,
                  p_mean=np.nan,
                  p_std=np.nan,
                  quiet=False):
    data = mixture.DataSet()
    data.fromList(allele_freq)

    distributions = []
    for i in xrange(max_components):
        if np.isnan(p_mean):
            mean = random()
        else:
            mean = p_mean
        if np.isnan(p_std):
            std = random()
        else:
            std = p_std
        distributions.append(mixture.NormalDistribution(mean, std))

    total_models = []
    for i in xrange(max_components):
        weights = list(np.repeat(1.0 / (i + 1), i + 1))
        components = distributions[0:i + 1]
        model = mixture.MixtureModel(i + 1, weights, components)

        model.EM(data, 1000, 0.001, silent=quiet)
        if not quiet:
            print
            print model
            print '------------------------------'
        total_models.append(model)

    model_selections = mixture.modelSelection(data, total_models, silent=quiet)
    best_model = total_models[model_selections[1].index(
        min(model_selections[1]))]
    best_model_bic = min(model_selections[1])
    labels = best_model.classify(data, silent=1)

    return best_model, labels, best_model_bic
Esempio n. 6
0
def getBackgroundModel(p, dist=None):
    """
    Construct background model

    @param p: number of positions of the binding site
    @param dist: background nucleotide frequencies, uniform is default

    @return: MixtureModel representing the background
    """
    DNA = mixture.Alphabet(['A', 'C', 'G', 'T'])
    dlist = []

    if dist == None:
        phi = [0.25] * 4
    else:
        phi = dist

    for j in range(p):
        dlist.append(mixture.DiscreteDistribution(4, phi, DNA))
    comps = [mixture.ProductDistribution(dlist)]

    m = mixture.MixtureModel(1, [1.0], comps)
    return m
Esempio n. 7
0
def setup():
    # Setting up a two component mixture over four features.
    # Two features are Normal distributions, two discrete.

    # initializing atomar distributions for first component
    n11 = mixture.NormalDistribution(1.0,1.5)
    n12 = mixture.NormalDistribution(2.0,0.5)
    d13 = mixture.DiscreteDistribution(4,[0.1,0.4,0.4,0.1])
    d14 = mixture.DiscreteDistribution(4,[0.25,0.25,0.25,0.25])

    # initializing atomar distributions for second component
    n21 = mixture.NormalDistribution(4.0,0.5)
    n22 = mixture.NormalDistribution(-6.0,0.5)
    d23 = mixture.DiscreteDistribution(4,[0.7,0.1,0.1,0.1])
    d24 = mixture.DiscreteDistribution(4,[0.1,0.1,0.2,0.6])

    # creating component distributions
    c1 = mixture.ProductDistribution([n11,n12,d13,d14])
    c2 = mixture.ProductDistribution([n21,n22,d23,d24])

    # intializing mixture
    pi = [0.4,0.6]
    m = mixture.MixtureModel(2,pi,[c1,c2])
    return m
Esempio n. 8
0
# iq.txt = iq and achievement test fields from pheno.txt
# drd4_len.txt = drd4 vntr types, only number of repeats
data.fromFiles(["iq.txt", "phys.txt", "drd4_len.txt"])

COMOR = 11
G = 8
components = []
for i in range(G):

    # intelligence and achivement tests as univariate normal distributions. (TEST)
    bd_mu = float(random.randint(3, 16))
    bd_sigma = random.uniform(1.0, 8.0)
    missing_bd = mixture.NormalDistribution(-9999.9, 0.00001)
    dist_bd = mixture.NormalDistribution(bd_mu, bd_sigma)
    mix_bd = mixture.MixtureModel(2, [0.999, 0.001], [dist_bd, missing_bd],
                                  compFix=[0, 2])

    voc_mu = float(random.randint(3, 16))
    voc_sigma = random.uniform(1.0, 8.0)
    missing_voc = mixture.NormalDistribution(-9999.9, 0.00001)
    dist_voc = mixture.NormalDistribution(voc_mu, voc_sigma)
    mix_voc = mixture.MixtureModel(2, [0.999, 0.001], [dist_voc, missing_voc],
                                   compFix=[0, 2])

    read_mu = float(random.randint(80, 120))
    read_sigma = random.uniform(1.0, 28.0)
    missing_read = mixture.NormalDistribution(-9999.9, 0.00001)
    dist_read = mixture.NormalDistribution(read_mu, read_sigma)
    mix_read = mixture.MixtureModel(2, [0.999, 0.001],
                                    [dist_read, missing_read],
                                    compFix=[0, 2])
Esempio n. 9
0
File: gmm.py Progetto: spyke/spyke
xmax, ymax = data.max(axis=0)
#xmean, ymean = np.mean([xmin, xmax]), np.mean([ymin, ymax])
width, height = xmax-xmin, ymax-ymin
xd = pm.UniformDistribution(xmin, xmax)
yd = pm.UniformDistribution(ymin, ymax)
distrib = pm.ProductDistribution([xd, yd])
distribs.append(distrib)
compFix = [0] * ndistribs
compFix[-1] = 1 # flag to make last distrib have fixed params
'''

pmdata = pm.DataSet()
pmdata.fromArray(data)

m = pm.MixtureModel(ndistribs,
                    np.ones(ndistribs) / ndistribs,
                    distribs,
                    compFix=None)
#m.modelInitialization(pmdata) # this hangs? only for multivariate distribs, works fine for productdistribs
posterior, loglikelihood = m.EM(pmdata, 50, 0.1)
#posterior, loglikelihood = m.randMaxEM(pmdata, 20, 100, 0.5, silent=False)

cids = m.classify(pmdata, entropy_cutoff=0.5, silent=True)

ncolours = len(COLOURS)
colouris = cids % ncolours
colours = np.asarray(COLOURS)[colouris]
colours[cids == -1] = GREY  # unclassified points

f = figure()
a = gca()
f.canvas.SetBackgroundColour(wx.BLACK)