def testLymphData(): k = 5 d = 11 aux = [0]*d models = [] for i in range(k): aux1 = [0]*d aux2 = [0]*d aux3 = [0]*d models.append(mixture.ProductDistribution([mixture.DependenceTreeDistribution(d,aux1,aux2,aux3)])) pi = [1.0]*k pi = np.array(pi)/k train = mixture.MixtureModel(k,pi,models) data = mixture.DataSet() data.fromFiles(['data/ltree2_2fold.txt'],) train.modelInitialization(data) train.EM(data,100,0.01,silent=1)
def testdtree(): tree = {} tree[0] = -1 tree[1] = 0 tree[2] = 1 n1 = mixture.ProductDistribution([mixture.ConditionalGaussDistribution(3,[0, 1, 0], [0, -0.1, 0.1], [0.5,0.5,0.5],tree)]) tree2 = {} tree2[0] = -1 tree2[1] = 0 tree2[2] = 0 n2 = mixture.ProductDistribution([mixture.ConditionalGaussDistribution(3,[-1, 0, 1], [0, 0.1, -0.1], [0.5,0.5,0.5],tree2)]) pi = [0.4, 0.6] gen = mixture.MixtureModel(2,pi,[n1,n2]) random.seed(1) data = gen.sampleDataSet(1000) print data n1 = mixture.ProductDistribution([mixture.DependenceTreeDistribution(3,[0.1, 1.1, 0.1], [0, 0, 0], [1.0,1.0,1.0])]) n2 = mixture.ProductDistribution([mixture.DependenceTreeDistribution(3,[-1, 0, -0.1], [0, 0, 0], [1.0,1.0,1.0])]) n1 = mixture.ProductDistribution([mixture.ConditionalGaussDistribution(3,[0, 1, 0], [0.0, 0.1, 0.1], [0.1,0.1,0.1],tree)]) n2 = mixture.ProductDistribution([mixture.ConditionalGaussDistribution(3,[-1, 0, 1], [0.0, 0.1, 0.1], [0.1,0.1,0.1],tree2)]) train = mixture.MixtureModel(2,pi,[n1,n2]) train.modelInitialization(data) train.EM(data,100,0.01,silent=1)
def setup2(): # Setting up a two component mixture over four features. # Two features are Normal distributions, two discrete. # initializing atomar distributions for first component n1 = mixture.NormalDistribution(1.0,1.5) n2 = mixture.NormalDistribution(2.0,0.5) d1 = mixture.DiscreteDistribution(4,[0.1,0.4,0.4,0.1]) c1 = mixture.ProductDistribution([n1,n2]) c2 = mixture.ProductDistribution([n1,d1]) # intializing mixture pi = [0.4,0.6] m = mixture.MixtureModel(2,pi,[c1,c2]) return m
def getModel(G, p): """ Constructs a PWM MixtureModel. @param G: number of components @param p: number of positions of the binding site @return: MixtureModel object """ DNA = mixture.Alphabet(['A', 'C', 'G', 'T']) comps = [] for i in range(G): dlist = [] for j in range(p): phi = mixture.random_vector(4) dlist.append(mixture.DiscreteDistribution(4, phi, DNA)) comps.append(mixture.ProductDistribution(dlist)) pi = mixture.random_vector(G) m = mixture.MixtureModel(G, pi, comps) return m
def mixture_model(allele_freq, max_components, p_mean=np.nan, p_std=np.nan, quiet=False): data = mixture.DataSet() data.fromList(allele_freq) distributions = [] for i in xrange(max_components): if np.isnan(p_mean): mean = random() else: mean = p_mean if np.isnan(p_std): std = random() else: std = p_std distributions.append(mixture.NormalDistribution(mean, std)) total_models = [] for i in xrange(max_components): weights = list(np.repeat(1.0 / (i + 1), i + 1)) components = distributions[0:i + 1] model = mixture.MixtureModel(i + 1, weights, components) model.EM(data, 1000, 0.001, silent=quiet) if not quiet: print print model print '------------------------------' total_models.append(model) model_selections = mixture.modelSelection(data, total_models, silent=quiet) best_model = total_models[model_selections[1].index( min(model_selections[1]))] best_model_bic = min(model_selections[1]) labels = best_model.classify(data, silent=1) return best_model, labels, best_model_bic
def getBackgroundModel(p, dist=None): """ Construct background model @param p: number of positions of the binding site @param dist: background nucleotide frequencies, uniform is default @return: MixtureModel representing the background """ DNA = mixture.Alphabet(['A', 'C', 'G', 'T']) dlist = [] if dist == None: phi = [0.25] * 4 else: phi = dist for j in range(p): dlist.append(mixture.DiscreteDistribution(4, phi, DNA)) comps = [mixture.ProductDistribution(dlist)] m = mixture.MixtureModel(1, [1.0], comps) return m
def setup(): # Setting up a two component mixture over four features. # Two features are Normal distributions, two discrete. # initializing atomar distributions for first component n11 = mixture.NormalDistribution(1.0,1.5) n12 = mixture.NormalDistribution(2.0,0.5) d13 = mixture.DiscreteDistribution(4,[0.1,0.4,0.4,0.1]) d14 = mixture.DiscreteDistribution(4,[0.25,0.25,0.25,0.25]) # initializing atomar distributions for second component n21 = mixture.NormalDistribution(4.0,0.5) n22 = mixture.NormalDistribution(-6.0,0.5) d23 = mixture.DiscreteDistribution(4,[0.7,0.1,0.1,0.1]) d24 = mixture.DiscreteDistribution(4,[0.1,0.1,0.2,0.6]) # creating component distributions c1 = mixture.ProductDistribution([n11,n12,d13,d14]) c2 = mixture.ProductDistribution([n21,n22,d23,d24]) # intializing mixture pi = [0.4,0.6] m = mixture.MixtureModel(2,pi,[c1,c2]) return m
# iq.txt = iq and achievement test fields from pheno.txt # drd4_len.txt = drd4 vntr types, only number of repeats data.fromFiles(["iq.txt", "phys.txt", "drd4_len.txt"]) COMOR = 11 G = 8 components = [] for i in range(G): # intelligence and achivement tests as univariate normal distributions. (TEST) bd_mu = float(random.randint(3, 16)) bd_sigma = random.uniform(1.0, 8.0) missing_bd = mixture.NormalDistribution(-9999.9, 0.00001) dist_bd = mixture.NormalDistribution(bd_mu, bd_sigma) mix_bd = mixture.MixtureModel(2, [0.999, 0.001], [dist_bd, missing_bd], compFix=[0, 2]) voc_mu = float(random.randint(3, 16)) voc_sigma = random.uniform(1.0, 8.0) missing_voc = mixture.NormalDistribution(-9999.9, 0.00001) dist_voc = mixture.NormalDistribution(voc_mu, voc_sigma) mix_voc = mixture.MixtureModel(2, [0.999, 0.001], [dist_voc, missing_voc], compFix=[0, 2]) read_mu = float(random.randint(80, 120)) read_sigma = random.uniform(1.0, 28.0) missing_read = mixture.NormalDistribution(-9999.9, 0.00001) dist_read = mixture.NormalDistribution(read_mu, read_sigma) mix_read = mixture.MixtureModel(2, [0.999, 0.001], [dist_read, missing_read], compFix=[0, 2])
xmax, ymax = data.max(axis=0) #xmean, ymean = np.mean([xmin, xmax]), np.mean([ymin, ymax]) width, height = xmax-xmin, ymax-ymin xd = pm.UniformDistribution(xmin, xmax) yd = pm.UniformDistribution(ymin, ymax) distrib = pm.ProductDistribution([xd, yd]) distribs.append(distrib) compFix = [0] * ndistribs compFix[-1] = 1 # flag to make last distrib have fixed params ''' pmdata = pm.DataSet() pmdata.fromArray(data) m = pm.MixtureModel(ndistribs, np.ones(ndistribs) / ndistribs, distribs, compFix=None) #m.modelInitialization(pmdata) # this hangs? only for multivariate distribs, works fine for productdistribs posterior, loglikelihood = m.EM(pmdata, 50, 0.1) #posterior, loglikelihood = m.randMaxEM(pmdata, 20, 100, 0.5, silent=False) cids = m.classify(pmdata, entropy_cutoff=0.5, silent=True) ncolours = len(COLOURS) colouris = cids % ncolours colours = np.asarray(COLOURS)[colouris] colours[cids == -1] = GREY # unclassified points f = figure() a = gca() f.canvas.SetBackgroundColour(wx.BLACK)