def getBayesModel(G, p, mixPrior=None): """ Constructs a PWM CSI BayesMixtureModel. @param G: number of components @param p: number of positions of the binding site @return: BayesMixtureModel object """ if not mixPrior: piPrior = mixture.DirichletPrior(G, [1.0] * G) compPrior = [] for i in range(p): compPrior.append( mixture.DirichletPrior(4, [1.02, 1.02, 1.02, 1.02])) # arbitrary values of struct and comp parameters. Values should be # reset by user using the structPriorHeuristic method. mixPrior = mixture.MixtureModelPrior(0.05, 0.05, piPrior, compPrior) DNA = mixture.Alphabet(['A', 'C', 'G', 'T']) comps = [] for i in range(G): dlist = [] for j in range(p): phi = mixture.random_vector(4) dlist.append(mixture.DiscreteDistribution(4, phi, DNA)) comps.append(mixture.ProductDistribution(dlist)) pi = mixture.random_vector(G) m = mixture.BayesMixtureModel(G, pi, comps, mixPrior, struct=1) return m
def getRandomCSIMixture_conditionalDists(G, p, KL_lower, KL_upper, M=8, dtypes='discgauss', seed=None, fullstruct=False, disc_sampling_dist=None): # if seed: # random.seed(seed) # mixture._C_mixextend.set_gsl_rng_seed(seed) # #print '*** seed=',seed # # else: # XXX debug # seed = random.randint(1,9999999) # mixture._C_mixextend.set_gsl_rng_seed(seed) # random.seed(seed) # #print '*** seed=',seed if disc_sampling_dist == None: discSamp = mixture.DirichletPrior(M, [1.0] * M) # uniform sampling else: discSamp = disc_sampling_dist min_sigma = 0.3 # minimal std for Normal max_sigma = 5.0 # maximal std for Normal min_mu = -25.0 # minimal mean max_mu = 25.0 # maximal mean assert dtypes in ['disc', 'gauss', 'discgauss'] if dtypes == 'disc': featureTypes = [0] * p elif dtypes == 'gauss': featureTypes = [1] * p elif dtypes == 'discgauss': # discrete or Normal features for now, chosen uniformly # 0 discrete, 1 Normal featureTypes = [random.choice((0, 1)) for i in range(p)] else: raise TypeError #print featureTypes # generate random CSI structures if G < 15: P = setPartitions.generate_all_partitions( G) # XXX too slow for large G #print P C = [] leaders = [] groups = [] for j in range(p): c_j = {} leaders_j = [] groups_j = {} if fullstruct == True: struct_j = [(i, ) for i in range(G)] elif G < 15: struct_j = random.choice(P) else: print 'WARNING: improper structure sampling !' struct_j = setPartitions.get_random_partition(G) #print '\nstruct',j,struct_j for i, grp in enumerate(struct_j): lg = list(grp) #print lg lgj = lg.pop(0) #print lgj leaders_j.append(lgj) groups_j[lgj] = lg max_tries = 100000 tries = 0 if featureTypes[j] == 0: acc = 0 while acc == 0: cand = discSamp.sample() #print 'Cand:', cand acc = 1 for d in c_j: KL_dist = mixture.sym_kl_dist(c_j[d], cand) #print c_j[d],cand, KL_dist if KL_dist > KL_upper or KL_dist < KL_lower: acc = 0 tries += 1 break if tries >= max_tries: raise RuntimeError, 'Failed to find separated parameters !' for cind in grp: c_j[cind] = cand elif featureTypes[j] == 1: acc = 0 while acc == 0: mu = random.uniform(min_mu, max_mu) sigma = random.uniform(min_sigma, max_sigma) cand = mixture.NormalDistribution(mu, sigma) acc = 1 for d in c_j: KL_dist = mixture.sym_kl_dist(c_j[d], cand) if KL_dist > KL_upper or KL_dist < KL_lower: acc = 0 tries += 1 break if tries >= max_tries: raise RuntimeError # print '.', #print for cind in grp: c_j[cind] = cand else: RuntimeError leaders.append(leaders_j) groups.append(groups_j) C.append(c_j) comps = [] for i in range(G): comps.append(mixture.ProductDistribution([C[j][i] for j in range(p)])) pi = get_random_pi(G, 0.3 / G) #print '** pi =',pi # create prior piprior = mixture.DirichletPrior(G, [2.0] * G) cprior = [] for j in range(p): if featureTypes[j] == 0: cprior.append(mixture.DirichletPrior(M, [1.02] * M)) elif featureTypes[j] == 1: cprior.append(mixture.NormalGammaPrior( 0, 0, 0, 0)) # dummy parameters, to be set later else: RuntimeError mprior = mixture.MixtureModelPrior(0.1, 0.1, piprior, cprior) m = mixture.BayesMixtureModel(G, pi, comps, mprior, struct=1) m.leaders = leaders m.groups = groups m.identifiable() m.updateFreeParams() #print m return m
2, [1.0, 1.0]) # uniform prior of mixture coefficients # conjugate priors over the atomar distributions - Normal-Gamma for Normal distribution, Dirichlet for the discrete distribution compPrior = [ mixture.NormalGammaPrior(1.5, 0.1, 3.0, 1.0), mixture.NormalGammaPrior(-2.0, 0.1, 3.0, 1.0), mixture.DirichletPrior(4, [1.0, 1.0, 1.0, 1.0]), mixture.DirichletPrior(4, [1.0, 1.0, 1.0, 1.0]) ] # putting together the mixture prior prior = mixture.MixtureModelPrior(0.03, 0.03, piPr, compPrior) # intializing Bayesian mixture model pi = [0.4, 0.6] m = mixture.BayesMixtureModel(2, pi, [c1, c2], prior, struct=1) print "Initial parameters" print m # Now that the model is complete we can start using it. # sampling data data = m.sampleDataSet(600) # randomize model parameters m.modelInitialization(data) print "Randomized parameters" print m # parameter training m.mapEM(data, 40, 0.1)
mixture.DirichletPrior(4, [3.1, 1.2, 1.1, 1.0]) ] dmixPrior = mixture.DirichletMixturePrior(2, 4, [0.5, 0.5], dPrior) # assembling the model prior compPrior = [ mixture.NormalGammaPrior(1.5, 0.1, 3.0, 1.0), mixture.NormalGammaPrior(-2.0, 0.1, 3.0, 1.0), dmixPrior, dmixPrior ] # putting together the prior for the whole mixture prior = mixture.MixtureModelPrior(0.03, 0.03, piPr, compPrior) # intializing Bayesian mixture model pi = [0.4, 0.6] m = mixture.BayesMixtureModel(2, pi, [c1, c2], prior) print "Initial parameters" print m # Now that the model is complete we can start using it. # sampling data data = m.sampleDataSet(600) # randomize model parameters m.modelInitialization(data) print "Randomized parameters" print m # parameter training m.mapEM(data, 40, 0.1)
compPrior.append( mixture.NormalGammaDistribution( 1.0,2.0,3.0,4.0 ) ) mixPrior = mixture.MixturePrior(0.7,0.7,piPrior, compPrior) DNA = mixture.Alphabet(['A','C','G','T']) comps = [] for i in range(G): dlist = [] for j in range(2): phi = mixture.random_vector(4) dlist.append( mixture.DiscreteDistribution(4,phi,DNA)) for j in range(2): mu = j+1.0 sigma = j+0.5 dlist.append( mixture.NormalDistribution(mu,sigma)) comps.append(mixture.ProductDistribution(dlist)) pi = mixture.random_vector(G) m = mixture.BayesMixtureModel(G,pi, comps, mixPrior, struct = 1) mixture.writeMixture(m, 'test.bmix') m2 = mixture.readMixture('test.bmix') print m2 print m2.prior
sp2.setParams(data.getInternalFeature(1), 5) sp3 = mixture.NormalGammaPrior(1.0, 1.0, 1.0, 1.0) sp3.setParams(data.getInternalFeature(2), 5) sp4 = mixture.DirichletPrior(4, [1.02] * 4) pipr = mixture.DirichletPrior(5, [1.0] * 5) # the hyperparameter alpha is chosen based on the heuristic below delta = 0.1 structPrior = 1.0 / (1.0 + delta)**data.N # creating the model prior prior = mixture.MixtureModelPrior(structPrior, 0.03, pipr, [sp1, sp2, sp3, sp4]) # creating the model tm = mixture.BayesMixtureModel(5, tpi, [tc1, tc2, tc3, tc4, tc5], prior, struct=1) # call to the learning algorithm tm.bayesStructureEM(data, 1, 5, 40, 0.1) # printing out the result of the training. The model should have three components and # parameters closely matching the generating model. print "---------------------" print tm print tm.leaders print tm.groups