ss = [0]*K

i = 0
for row in reader:
	i += 1

	if (random.random() < float(options.sampleRate)):
		data = map(float, row)
		if (len(data) != K):
			print "Error: there are " + str(K) + " categories, but line has " + str(len(data)) + " counts."
			print "line " + str(i) + ": " + str(data)
		
		for k in range(0, K): ss[k] += math.log(data[k])

	if (i % 1000000) == 0: print "Loading Data", i

for k in range(0, K): ss[k] /= i

dataLoadTime = time.time()
logging.debug("all data loaded into memory")
logging.debug("time to load memory: ", dataLoadTime - startTime)

priors = DE.findDirichletPriors(ss, priors)	
print "Final priors: ", priors
logging.debug("Final average loss:", DE.getTotalLoss(priors, ss))
logging.debug("best loss: ", DE.getTotalLoss([1,2], ss))

totalTime = time.time() - dataLoadTime
logging.debug("Time to calculate: " + str(totalTime))
	
	
i = 0
for row in reader:
    i += 1

    if (random.random() < float(options.sampleRate)):
        data = map(float, row)
        if (len(data) != K):
            print "Error: there are " + str(
                K) + " categories, but line has " + str(len(data)) + " counts."
            print "line " + str(i) + ": " + str(data)

        for k in range(0, K):
            ss[k] += math.log(data[k])

    if (i % 1000000) == 0: print "Loading Data", i

for k in range(0, K):
    ss[k] /= i

dataLoadTime = time.time()
logging.debug("all data loaded into memory")
logging.debug("time to load memory: ", dataLoadTime - startTime)

priors = DE.findDirichletPriors(ss, priors)
print "Final priors: ", priors
logging.debug("Final average loss:", DE.getTotalLoss(priors, ss))
logging.debug("best loss: ", DE.getTotalLoss([1, 2], ss))

totalTime = time.time() - dataLoadTime
logging.debug("Time to calculate: " + str(totalTime))
                uMatrix = Sample.generateRandomDataset(M, N, alphas)
                vVector = [N] * M
                init = [1.0 / K] * K
                MLEPriors = DME.findDirichletPriors(uMatrix, vVector, init,
                                                    False)
                errors.append(getError(alphas, MLEPriors))

            errors.sort()

            print "\t".join(
                map(str,
                    [N, M, errors[300], errors[500], errors[700], errors[900]
                     ]))

        # Test the M = infinity case
        errors = []

        for i in range(0, 1000):
            ss = Sample.generateRandomDirichletsSS(N, alphas)
            init = [1.0 / K] * K
            MLEPriors = DE.findDirichletPriors(ss, init, False)
            error = getError(alphas, MLEPriors)
            errors.append(error)

        errors.sort()

        print "\t".join(
            map(str,
                [N, "Inf", errors[300], errors[500], errors[700], errors[900]
                 ]))
Exemple #4
0
    print
    K = len(alphas)
  
    for M in [5]:
      errors = []

      for i in range(0, 1000):
        uMatrix = Sample.generateRandomDataset(M, N, alphas)
        vVector = [N]*M
        init = [1.0 / K]*K
        MLEPriors = DME.findDirichletPriors(uMatrix, vVector, init, False)
        errors.append(getError(alphas, MLEPriors))

      errors.sort()

      print "\t".join(map(str, [N, M, errors[300], errors[500], errors[700], errors[900]]))

    # Test the M = infinity case
    errors = []

    for i in range(0, 1000):
      ss = Sample.generateRandomDirichletsSS(N, alphas)
      init = [1.0 / K]*K
      MLEPriors = DE.findDirichletPriors(ss, init, False)
      error = getError(alphas, MLEPriors)
      errors.append(error)

    errors.sort()

    print "\t".join(map(str, [N, "Inf", errors[300], errors[500], errors[700], errors[900]]))