from pymix.distributions.normal import NormalDistribution from pymix.distributions.product import ProductDistribution from pymix.models.mixture import MixtureModel from pymix.util.dataset import DataSet pr1 = ProductDistribution([ NormalDistribution(-6.0, 0.5), NormalDistribution(-4.0, 0.5), NormalDistribution(-3.0, 0.5) ]) pr2 = ProductDistribution([ NormalDistribution(-5.0, 0.5), NormalDistribution(-3.3, 0.5), NormalDistribution(-2.3, 0.5) ]) m = MixtureModel(2, [0.7, 0.3], [pr1, pr2]) seq = m.sampleSet(5) #print seq z = 0 m.printTraceback(DataSet().fromList(seq), z)
import random #d = DataSet(["test.txt","drd1.txt","pheno.txt"]) from pymix.distributions.multinomial import MultinomialDistribution from pymix.distributions.normal import NormalDistribution from pymix.distributions.product import ProductDistribution from pymix.models.mixture import MixtureModel from pymix.util.dataset import DataSet from test_mixture import SNP d = DataSet(["test.txt", "drd1.txt", "pheno.txt"]) print d p1 = [] p2 = [] p3 = [] p4 = [] for i in range(25): p1.append(random.random()) p2.append(random.random()) p3.append(random.random()) p4.append(random.random()) g1 = lambda x: x / sum(p1) p1 = map(g1, p1) g2 = lambda x: x / sum(p2) p2 = map(g2, p2) g3 = lambda x: x / sum(p3) p3 = map(g3, p3)
from pymix import mixture import random from pymix.parse import readMixture from pymix.util.alphabet import Alphabet from pymix.util.dataset import DataSet VNTR = Alphabet(['.', '2/4', '2/7', '3/4', '3/7', '4/4', '4/6', '4/7', '4/8', '4/9', '7/7']) DIAG = Alphabet(['.', '0', '8', '1']) data = DataSet() # iq.txt = iq and achievement test fields from pheno.txt # drd4_len.txt = drd4 vntr types, only number of repeats data.fromFiles(["filt_WISC_WIAT_DISC_134.txt"]) # ,"DRD4_134_len.txt" m = readMixture('pheno_best.py') print "Without deterministic anealing:" m.randMaxEM(data, 100, 30, 0.1, tilt=0, silent=0) print "\nWith deterministic annealing:" m.randMaxEM(data, 100, 30, 0.1, tilt=1, silent=0)
#e2.EM(seq2,60,5) # ----------------------------- Example 3 ----------------------------- m3 = MixtureModel(2, [0.3, 0.7], [NormalDistribution(0.0, 0.5), NormalDistribution(1.3, 0.5)]) (true, seq3) = m3.sampleSetLabels(380) m4 = MixtureModel( 2, [0.5, 0.5], [NormalDistribution(-1.5, 1.5), NormalDistribution(1.5, 1.5)]) dat = DataSet() dat.fromArray(seq3) print "vorher ------\n", m4 pred = m4.cluster(dat, nr_runs=5, nr_init=9, max_iter=30, delta=0.1, labels=None, entropy_cutoff=None) classes = m4.classify(dat) m4.shortInitEM(dat, 5, 5, 5, 0.1) m4.EM(seq3, 20, 0.1)
""" items = [c.distList[0] for c in mixx.components] items.reverse() new_pi = np.array(mixx.pi.tolist() + [0.01])[::-1] new_pi = new_pi / np.sum(new_pi) #items = items + [MultiNormalDistribution(4, means, sigma)] items = items + [MultivariateTDistribution(DIMS, means, sigma, 5)] # Fix parameters of all components but the new one: #comp_fix = [1] * (len(new_pi) - 1) + [0] return MixtureModel(len(new_pi), new_pi, items) #import ipdb; ipdb.set_trace() st = MultivariateTDistribution(DIMS, xy.mean(axis=1)*1.1, np.diag(xy.var(axis=1)), 3) da = xy.T #[:500] ds = DataSet() ds.fromArray(da) m = MixtureModel(1, [1], [st]) print m m.EM(ds, 60, 0.1) print m #import ipdb; ipdb.set_trace() #m2 = mix.MixtureModel(2, [0.8, 0.2], [m.components[0].distList[0], d2], compFix=[0, 0]) for _ in xrange(6): m = mixturate(m, xy.mean(axis=1)-10., np.diag(xy.var(axis=1))) m.randMaxEM(ds, 3, 30, 0.1) print m import joblib joblib.dump(m, 'test2.mix', compress=3) pl.plotData(da[:, :2])
seq2 = e1.sample(500) e2 = MixtureModel(2, [0.5, 0.5], [NormalDistribution(2.0, 0.4), ExponentialDistribution(0.1)]) # e2.EM(seq2,60,5) # ----------------------------- Example 3 ----------------------------- m3 = MixtureModel(2, [0.3, 0.7], [NormalDistribution(0.0, 0.5), NormalDistribution(1.3, 0.5)]) (true, seq3) = m3.sampleSetLabels(380) m4 = MixtureModel(2, [0.5, 0.5], [NormalDistribution(-1.5, 1.5), NormalDistribution(1.5, 1.5)]) dat = DataSet() dat.fromArray(seq3) print "vorher ------\n", m4 pred = m4.cluster(dat, nr_runs=5, nr_init=9, max_iter=30, delta=0.1, labels=None, entropy_cutoff=None) classes = m4.classify(dat) m4.shortInitEM(dat, 5, 5, 5, 0.1) m4.EM(seq3, 20, 0.1) print "####Finish\n", m4 dat.printClustering(2, pred) evaluate(pred, true)