def setup2(): # Setting up a two component mixture over four features. # Two features are Normal distributions, two discrete. # initializing atomar distributions for first component n1 = mixture.NormalDistribution(1.0,1.5) n2 = mixture.NormalDistribution(2.0,0.5) d1 = mixture.DiscreteDistribution(4,[0.1,0.4,0.4,0.1]) c1 = mixture.ProductDistribution([n1,n2]) c2 = mixture.ProductDistribution([n1,d1]) # intializing mixture pi = [0.4,0.6] m = mixture.MixtureModel(2,pi,[c1,c2]) return m
def setup(): # Setting up a two component mixture over four features. # Two features are Normal distributions, two discrete. # initializing atomar distributions for first component n11 = mixture.NormalDistribution(1.0,1.5) n12 = mixture.NormalDistribution(2.0,0.5) d13 = mixture.DiscreteDistribution(4,[0.1,0.4,0.4,0.1]) d14 = mixture.DiscreteDistribution(4,[0.25,0.25,0.25,0.25]) # initializing atomar distributions for second component n21 = mixture.NormalDistribution(4.0,0.5) n22 = mixture.NormalDistribution(-6.0,0.5) d23 = mixture.DiscreteDistribution(4,[0.7,0.1,0.1,0.1]) d24 = mixture.DiscreteDistribution(4,[0.1,0.1,0.2,0.6]) # creating component distributions c1 = mixture.ProductDistribution([n11,n12,d13,d14]) c2 = mixture.ProductDistribution([n21,n22,d23,d24]) # intializing mixture pi = [0.4,0.6] m = mixture.MixtureModel(2,pi,[c1,c2]) return m
def sample(self,returnType='tuple'): assert returnType in ['tuple','object'] grand = random.gammavariate(self.shape, self.scale ) #return grand sigma = 1.0 / grand #print sigma mu = random.normalvariate(self.mu, math.sqrt(self.tau*sigma) ) #mu = random.normalvariate(self.mu, self.tau*sigma ) if returnType == 'tuple': return (mu,sigma) elif returnType == 'object': return mixture.NormalDistribution(mu,math.sqrt(sigma))
def mixture_model(allele_freq, max_components, p_mean=np.nan, p_std=np.nan, quiet=False): data = mixture.DataSet() data.fromList(allele_freq) distributions = [] for i in xrange(max_components): if np.isnan(p_mean): mean = random() else: mean = p_mean if np.isnan(p_std): std = random() else: std = p_std distributions.append(mixture.NormalDistribution(mean, std)) total_models = [] for i in xrange(max_components): weights = list(np.repeat(1.0 / (i + 1), i + 1)) components = distributions[0:i + 1] model = mixture.MixtureModel(i + 1, weights, components) model.EM(data, 1000, 0.001, silent=quiet) if not quiet: print print model print '------------------------------' total_models.append(model) model_selections = mixture.modelSelection(data, total_models, silent=quiet) best_model = total_models[model_selections[1].index( min(model_selections[1]))] best_model_bic = min(model_selections[1]) labels = best_model.classify(data, silent=1) return best_model, labels, best_model_bic
data = mixture.DataSet() # iq.txt = iq and achievement test fields from pheno.txt # drd4_len.txt = drd4 vntr types, only number of repeats data.fromFiles(["iq.txt", "phys.txt", "drd4_len.txt"]) COMOR = 11 G = 8 components = [] for i in range(G): # intelligence and achivement tests as univariate normal distributions. (TEST) bd_mu = float(random.randint(3, 16)) bd_sigma = random.uniform(1.0, 8.0) missing_bd = mixture.NormalDistribution(-9999.9, 0.00001) dist_bd = mixture.NormalDistribution(bd_mu, bd_sigma) mix_bd = mixture.MixtureModel(2, [0.999, 0.001], [dist_bd, missing_bd], compFix=[0, 2]) voc_mu = float(random.randint(3, 16)) voc_sigma = random.uniform(1.0, 8.0) missing_voc = mixture.NormalDistribution(-9999.9, 0.00001) dist_voc = mixture.NormalDistribution(voc_mu, voc_sigma) mix_voc = mixture.MixtureModel(2, [0.999, 0.001], [dist_voc, missing_voc], compFix=[0, 2]) read_mu = float(random.randint(80, 120)) read_sigma = random.uniform(1.0, 28.0) missing_read = mixture.NormalDistribution(-9999.9, 0.00001) dist_read = mixture.NormalDistribution(read_mu, read_sigma)
pos = np.asarray([[-27, 1305], [33, 1299], [-36, 1256], [-34, 1237], [27, 1250], [-35, 1213], [-36, 1176], [-38, 1151], [-38, 1044], [12, 1036], [-19, 995], [-37, 962], [-23, 925], [55, 950], [23, 931], [-20, 873], [20, 855], [23, 800], [-7, 781], [-28, 741], [30, 548], [-16, 482], [9, 289], [-31, 272], [-35, 167], [-19, 155], [30, 151], [62, 149], [-35, 110], [-24, 100], [38, 102], [-30, 73], [-24, 49]]) ndistribs = 50 distribs = [] for mu in pos: #mu = np.random.random(nd) #mu = mins + mu * ranges xd = pm.NormalDistribution(mu[0], 30) yd = pm.NormalDistribution(mu[1], 30) distrib = pm.ProductDistribution([xd, yd]) distribs.append(distrib) # add some extra random ones for i in range(len(pos), ndistribs): mu = np.random.random(nd) mu = mins + mu * ranges xd = pm.NormalDistribution(mu[0], 30) yd = pm.NormalDistribution(mu[1], 30) distrib = pm.ProductDistribution([xd, yd]) distribs.append(distrib) ''' # add background noise distrib, see 2006 Bar-Hillel #datamu = data.mean(axis=0) #datasigma = data.std(axis=0)