Esempio n. 1
0
def extractBudgetCountsBottomPercentile():
    budgetValues = []
    for project in Project.objects.all():
        budget = convertAmountStringToInteger(project.budget)
        budgetValues.append(budget)
    budgetValues.sort()
    totalBudgetCount = len(budgetValues)
    desiredBudgetCount = floor(totalBudgetCount / 4)
    budgetValues = budgetValues[:desiredBudgetCount]
    print("will work with bottom %d values out of %d which is the %f percent" %
          (desiredBudgetCount, totalBudgetCount, (len(budgetValues) * 100) / totalBudgetCount))

    # Now budgetValues array contains all project budgets as
    # longs. Separate them into buckets
    numberOfBuckets = 100
    lowBudgetValue = min(budgetValues)
    highBudgetValue = max(budgetValues)

    countPerBucket = [0] * numberOfBuckets
    for budgetValue in budgetValues:
        bucketIndex = valueToBucketIndex(numberOfBuckets, lowBudgetValue, highBudgetValue + 1, budgetValue)
        countPerBucket[bucketIndex] += 1

    theTsv = open("espaHistoSecond25Percent.tsv", "w+")

    dx = (highBudgetValue - lowBudgetValue) / numberOfBuckets
    print("high value is %f, low value is %f, interval is %f" % (highBudgetValue, lowBudgetValue, dx))
    for i in range(0, len(countPerBucket)):
        print("%d\t%d\t%d" % ((lowBudgetValue + i * dx), dx, countPerBucket[i]), file=theTsv)

    theTsv.flush()
    theTsv.close()
Esempio n. 2
0
def extractBudgetCountsAll():
    budgetValues = []
    for project in Project.objects.all():
        budget = convertAmountStringToInteger(project.budget)
        budgetValues.append(budget)
    # Now budgetValues array contains all project budgets as
    # longs. Separate them into buckets
    numberOfBuckets = 100
    lowBudgetValue = min(budgetValues)
    highBudgetValue = max(budgetValues)

    countPerBucket = [0] * numberOfBuckets
    for budgetValue in budgetValues:
        bucketIndex = valueToBucketIndex(numberOfBuckets, lowBudgetValue, highBudgetValue + 1, budgetValue)
        countPerBucket[bucketIndex] += 1

    theTsv = open("espaHisto.tsv", "w+")

    dx = (highBudgetValue - lowBudgetValue) / numberOfBuckets
    print("high value is %f, low value is %f, interval is %f" % (highBudgetValue, lowBudgetValue, dx))
    for i in range(0, len(countPerBucket)):
        print("%d\t%d\t%d" % ((lowBudgetValue + i * dx) / 1000000, dx / 1000000, countPerBucket[i]), file=theTsv)

    theTsv.flush()
    theTsv.close()