def getRandomCSIMixture_conditionalDists(G, p, KL_lower, KL_upper, M=8, dtypes='discgauss', seed=None, fullstruct=False, disc_sampling_dist=None): # if seed: # random.seed(seed) # mixture._C_mixextend.set_gsl_rng_seed(seed) # #print '*** seed=',seed # # else: # XXX debug # seed = random.randint(1,9999999) # mixture._C_mixextend.set_gsl_rng_seed(seed) # random.seed(seed) # #print '*** seed=',seed if disc_sampling_dist == None: discSamp = mixture.DirichletPrior(M, [1.0] * M) # uniform sampling else: discSamp = disc_sampling_dist min_sigma = 0.3 # minimal std for Normal max_sigma = 5.0 # maximal std for Normal min_mu = -25.0 # minimal mean max_mu = 25.0 # maximal mean assert dtypes in ['disc', 'gauss', 'discgauss'] if dtypes == 'disc': featureTypes = [0] * p elif dtypes == 'gauss': featureTypes = [1] * p elif dtypes == 'discgauss': # discrete or Normal features for now, chosen uniformly # 0 discrete, 1 Normal featureTypes = [random.choice((0, 1)) for i in range(p)] else: raise TypeError #print featureTypes # generate random CSI structures if G < 15: P = setPartitions.generate_all_partitions( G) # XXX too slow for large G #print P C = [] leaders = [] groups = [] for j in range(p): c_j = {} leaders_j = [] groups_j = {} if fullstruct == True: struct_j = [(i, ) for i in range(G)] elif G < 15: struct_j = random.choice(P) else: print 'WARNING: improper structure sampling !' struct_j = setPartitions.get_random_partition(G) #print '\nstruct',j,struct_j for i, grp in enumerate(struct_j): lg = list(grp) #print lg lgj = lg.pop(0) #print lgj leaders_j.append(lgj) groups_j[lgj] = lg max_tries = 100000 tries = 0 if featureTypes[j] == 0: acc = 0 while acc == 0: cand = discSamp.sample() #print 'Cand:', cand acc = 1 for d in c_j: KL_dist = mixture.sym_kl_dist(c_j[d], cand) #print c_j[d],cand, KL_dist if KL_dist > KL_upper or KL_dist < KL_lower: acc = 0 tries += 1 break if tries >= max_tries: raise RuntimeError, 'Failed to find separated parameters !' for cind in grp: c_j[cind] = cand elif featureTypes[j] == 1: acc = 0 while acc == 0: mu = random.uniform(min_mu, max_mu) sigma = random.uniform(min_sigma, max_sigma) cand = mixture.NormalDistribution(mu, sigma) acc = 1 for d in c_j: KL_dist = mixture.sym_kl_dist(c_j[d], cand) if KL_dist > KL_upper or KL_dist < KL_lower: acc = 0 tries += 1 break if tries >= max_tries: raise RuntimeError # print '.', #print for cind in grp: c_j[cind] = cand else: RuntimeError leaders.append(leaders_j) groups.append(groups_j) C.append(c_j) comps = [] for i in range(G): comps.append(mixture.ProductDistribution([C[j][i] for j in range(p)])) pi = get_random_pi(G, 0.3 / G) #print '** pi =',pi # create prior piprior = mixture.DirichletPrior(G, [2.0] * G) cprior = [] for j in range(p): if featureTypes[j] == 0: cprior.append(mixture.DirichletPrior(M, [1.02] * M)) elif featureTypes[j] == 1: cprior.append(mixture.NormalGammaPrior( 0, 0, 0, 0)) # dummy parameters, to be set later else: RuntimeError mprior = mixture.MixtureModelPrior(0.1, 0.1, piprior, cprior) m = mixture.BayesMixtureModel(G, pi, comps, mprior, struct=1) m.leaders = leaders m.groups = groups m.identifiable() m.updateFreeParams() #print m return m
# creating component distributions c1 = mixture.ProductDistribution([n11, n12, d13, d14]) c2 = mixture.ProductDistribution([n21, n22, d23, d24]) # setting up a Dirichlet mixture prior with two components piPr = mixture.DirichletPrior( 2, [1.0, 1.0]) # uniform prior of mixture coefficients dPrior = [ mixture.DirichletPrior(4, [1.3, 1.6, 1.1, 4.0]), mixture.DirichletPrior(4, [3.1, 1.2, 1.1, 1.0]) ] dmixPrior = mixture.DirichletMixturePrior(2, 4, [0.5, 0.5], dPrior) # assembling the model prior compPrior = [ mixture.NormalGammaPrior(1.5, 0.1, 3.0, 1.0), mixture.NormalGammaPrior(-2.0, 0.1, 3.0, 1.0), dmixPrior, dmixPrior ] # putting together the prior for the whole mixture prior = mixture.MixtureModelPrior(0.03, 0.03, piPr, compPrior) # intializing Bayesian mixture model pi = [0.4, 0.6] m = mixture.BayesMixtureModel(2, pi, [c1, c2], prior) print "Initial parameters" print m # Now that the model is complete we can start using it. # sampling data data = m.sampleDataSet(600)
n32 = mixture.NormalDistribution(-3.0, 0.5) d33 = mixture.DiscreteDistribution(4, [0.1, 0.1, 0.1, 0.7]) d34 = mixture.DiscreteDistribution(4, [0.6, 0.1, 0.2, 0.1]) # creating component distributions c1 = mixture.ProductDistribution([n11, n12, d13, d14]) c2 = mixture.ProductDistribution([n21, n22, d23, d24]) c3 = mixture.ProductDistribution([n31, n32, d33, d34]) # setting up the mixture prior piPr = mixture.DirichletPrior( 3, [1.0, 1.0, 1.0]) # uniform prior of mixture coefficients # conjugate priors over the atomar distributions - Normal-Gamma for Normal distribution, Dirichlet for the discrete distribution compPrior = [ mixture.NormalGammaPrior(1.5, 0.01, 3.0, 1.0), mixture.NormalGammaPrior(-2.0, 0.01, 3.0, 1.0), mixture.DirichletPrior(4, [1.01, 1.01, 1.01, 1.01]), mixture.DirichletPrior(4, [1.01, 1.01, 1.01, 1.01]) ] # putting together the mixture prior prior = mixture.MixtureModelPrior(0.03, 0.03, piPr, compPrior) N = 400 prior.structPriorHeuristic(0.01, N) # intializing Bayesian mixture model pi = [0.3, 0.3, 0.4] m = labeledBayesMixture.labeledBayesMixtureModel(3, pi, [c1, c2, c3], prior,
td44 = mixture.DiscreteDistribution(4, [0.25] * 4) tc4 = mixture.ProductDistribution([tn41, tn42, tn43, td44]) tn51 = mixture.NormalDistribution(4.0, 0.5) tn52 = mixture.NormalDistribution(-6.0, 0.5) tn53 = mixture.NormalDistribution(1.0, 0.5) td54 = mixture.DiscreteDistribution(4, [0.25] * 4) tc5 = mixture.ProductDistribution([tn51, tn52, tn53, td54]) tpi = [0.3, 0.2, 0.2, 0.2, 0.1] # the hyperparameter of the NormalGamma distributions are # estimated heuristically in .setParams(...) sp1 = mixture.NormalGammaPrior(1.0, 1.0, 1.0, 1.0) sp1.setParams(data.getInternalFeature(0), 5) sp2 = mixture.NormalGammaPrior(1.0, 1.0, 1.0, 1.0) sp2.setParams(data.getInternalFeature(1), 5) sp3 = mixture.NormalGammaPrior(1.0, 1.0, 1.0, 1.0) sp3.setParams(data.getInternalFeature(2), 5) sp4 = mixture.DirichletPrior(4, [1.02] * 4) pipr = mixture.DirichletPrior(5, [1.0] * 5) # the hyperparameter alpha is chosen based on the heuristic below delta = 0.1 structPrior = 1.0 / (1.0 + delta)**data.N # creating the model prior prior = mixture.MixtureModelPrior(structPrior, 0.03, pipr,