def buildMixtureTreeMaxKL(data, K, iterations, maxKL, branchesPerNode = 2, multinomial = None): logging.info("Dataset Size: " + str(len(data))) if (multinomial): (worstKL, worstN) = MME.worstFitForSingleMultinomial(data, multinomial) logging.info("Worst KL found in branch: " + str(worstKL)) if (worstKL < maxKL): return None if (len(data) < branchesPerNode): return None # hyperparameters are fixed here: hyperP = MME.MultinomialMixtureModelHyperparams(branchesPerNode, K, [1.0 / branchesPerNode]*branchesPerNode, [1.0 / K]*K) mixtureModel = MME.computeDirichletMixture(data, hyperP, iterations) smallerDatasets = [] for c in range(0, hyperP.C): smallerDatasets.append([]) for counts in data: c = MME.assignComponentToCounts(counts, mixtureModel) smallerDatasets[c].append(counts) treeModel = MultinomialMixtureTree(mixtureModel) for c in range(0, hyperP.C): smallerDataset = smallerDatasets[c] child = buildMixtureTreeMaxKL(smallerDataset, K, iterations, maxKL, branchesPerNode, mixtureModel.multinomials[c]) treeModel.mixtureNodes[c] = child return treeModel
def buildSimpleMixtureTree(data, K, iterations, height, branchesPerNode = 2): if (height == 0): return None # hyperparameters are fixed here: hyperP = MME.MultinomialMixtureModelHyperparams(branchesPerNode, K, [1.0 / branchesPerNode]*branchesPerNode, [1.0 / K]*K) mixtureModel = MME.computeDirichletMixture(data, hyperP, iterations) smallerDatasets = [] for c in range(0, hyperP.C): smallerDatasets.append([]) for counts in data: c = MME.assignComponentToCounts(counts, mixtureModel) smallerDatasets[c].append(counts) treeModel = MultinomialMixtureTree(mixtureModel) for c in range(0, hyperP.C): smallerDataset = smallerDatasets[c] child = buildSimpleMixtureTree(smallerDataset, K, iterations, height - 1, branchesPerNode) treeModel.mixtureNodes[c] = child return treeModel
#!/usr/bin/python import multinomialMixtureEstimation as MME import logging logging.basicConfig(level=logging.DEBUG) model = MME.importFile("sampleModel.txt") dataset = [] for i in range(0, 500): dataset.append(model.sampleRow(8)) hyperP = MME.MultinomialMixtureModelHyperparams(2, 3, [1, 1], [1, 1, 1]) finalModel = MME.computeDirichletMixture(dataset, hyperP, 10) print "Final Model:" print finalModel.mixture print finalModel.multinomials
raise ValueError('Invalid log level: %s' % loglevel) logging.basicConfig(level=numeric_level) C = int(options.C) iterations = int(options.I) print "init dataset" dataset = [] N = 0 for row in sys.stdin: if (N % 100000 == 0): print "processed " + str(N) + " rows." splitrow = row.split("\t") dataset.append(map(int, splitrow)) N += 1 print "finished dataset" hyperP = MME.MultinomialMixtureModelHyperparams(C, 168, [1] * C, [1] * 168) finalModel = MME.computeDirichletMixture(dataset, hyperP, iterations) logging.debug("Final Model:") outputModel = sys.stdin if (options.outputModel): outputModel = open(options.outputModel, 'w') finalModel.outputToFile(outputModel) finalModel.outputToTSV(sys.stdout) (worseLogProb, worstN, worstC) = MME.worstFit(dataset, finalModel) print "worstLogProb", worseLogProb print "worst N", worstN print "worst C", worstC
raise ValueError('Invalid log level: %s' % loglevel) logging.basicConfig(level=numeric_level) C = int(options.C) iterations = int(options.I) print "init dataset" dataset = [] N = 0 for row in sys.stdin: if (N % 100000 == 0): print "processed " + str(N) + " rows." splitrow = row.split("\t") dataset.append(map(int, splitrow)) N += 1 print "finished dataset" hyperP = MME.MultinomialMixtureModelHyperparams(C, 168, [1]*C, [1]*168) finalModel = MME.computeDirichletMixture(dataset, hyperP, iterations) logging.debug("Final Model:") outputModel = sys.stdin if (options.outputModel): outputModel = open(options.outputModel, 'w') finalModel.outputToFile(outputModel) finalModel.outputToTSV(sys.stdout) (worseLogProb, worstN, worstC) = MME.worstFit(dataset, finalModel) print "worstLogProb", worseLogProb print "worst N", worstN print "worst C", worstC