コード例 #1
0
def buildMixtureTreeMaxKL(data, K, iterations, maxKL, branchesPerNode = 2, multinomial = None):
  logging.info("Dataset Size: " + str(len(data)))

  if (multinomial):
    (worstKL, worstN) = MME.worstFitForSingleMultinomial(data, multinomial)
    logging.info("Worst KL found in branch: " + str(worstKL))
    if (worstKL < maxKL): return None

  if (len(data) < branchesPerNode): return None

  # hyperparameters are fixed here:
  hyperP = MME.MultinomialMixtureModelHyperparams(branchesPerNode, K, [1.0 / branchesPerNode]*branchesPerNode, [1.0 / K]*K)

  mixtureModel = MME.computeDirichletMixture(data, hyperP, iterations)
  
  smallerDatasets = []
  for c in range(0, hyperP.C): smallerDatasets.append([])

  for counts in data:
    c = MME.assignComponentToCounts(counts, mixtureModel)
    smallerDatasets[c].append(counts)

  treeModel = MultinomialMixtureTree(mixtureModel)

  for c in range(0, hyperP.C):
    smallerDataset = smallerDatasets[c]
    child = buildMixtureTreeMaxKL(smallerDataset, K, iterations, maxKL, branchesPerNode, mixtureModel.multinomials[c])
    treeModel.mixtureNodes[c] = child

  return treeModel
コード例 #2
0
def buildSimpleMixtureTree(data, K, iterations, height, branchesPerNode = 2):
  if (height == 0): return None

  # hyperparameters are fixed here:
  hyperP = MME.MultinomialMixtureModelHyperparams(branchesPerNode, K, [1.0 / branchesPerNode]*branchesPerNode, [1.0 / K]*K)

  mixtureModel = MME.computeDirichletMixture(data, hyperP, iterations)
  
  smallerDatasets = []
  for c in range(0, hyperP.C): smallerDatasets.append([])

  for counts in data:
    c = MME.assignComponentToCounts(counts, mixtureModel)
    smallerDatasets[c].append(counts)

  treeModel = MultinomialMixtureTree(mixtureModel)

  for c in range(0, hyperP.C):
    smallerDataset = smallerDatasets[c]
    child = buildSimpleMixtureTree(smallerDataset, K, iterations, height - 1, branchesPerNode)
    treeModel.mixtureNodes[c] = child

  return treeModel
コード例 #3
0
    dataset.append(map(int, splitrow))

#for n in range(0, len(dataset)):
#  counts = dataset[n]
#  print str(n) + "\t" + str(MME.assignComponentToCounts(counts, model))

# print file for google docs
#print "component\t",
#for i in range(0, C): print str(i) + "\t",
#print ""
#print "prior\t" + "\t".join(map(str, finalModel.mixture))
#
#for k in range(0, 168):
#  print str(k) + "\t",
#  for i in range(0, C):
#    print str(finalModel.multinomials[i][k]) + "\t",
#  print ""

rowInfoList = []
N = 0
for row in dataset:
    c = MME.assignComponentToCounts(row, model)
    klDiv = MME.klTest(row, model, c)
    rowInfoList.append([N, c, klDiv])
    N += 1

sortedRows = sorted(rowInfoList, key=(lambda x: -x[2]))

print "row\tmodel\tklDivergence"
for row in sortedRows:
    print "\t".join(map(str, row))
コード例 #4
0
for row in sys.stdin:
  splitrow = row.split("\t")
  dataset.append(map(int, splitrow))

#for n in range(0, len(dataset)):
#  counts = dataset[n]
#  print str(n) + "\t" + str(MME.assignComponentToCounts(counts, model))

# print file for google docs
#print "component\t",
#for i in range(0, C): print str(i) + "\t",
#print ""
#print "prior\t" + "\t".join(map(str, finalModel.mixture))
#
#for k in range(0, 168):
#  print str(k) + "\t",
#  for i in range(0, C):
#    print str(finalModel.multinomials[i][k]) + "\t",
#  print ""

rowInfoList = []
N = 0
for row in dataset:
  c = MME.assignComponentToCounts(row, model)
  klDiv = MME.klTest(row, model.multinomials[c])
  rowInfoList.append([N, c, klDiv, sum(row)])
  N += 1

print "row\tmodel\tklDivergence\tNumber of Data Points"
for row in rowInfoList:
  print "\t".join(map(str, row))