def extractBudgetCountsBottomPercentile(): budgetValues = [] for project in Project.objects.all(): budget = convertAmountStringToInteger(project.budget) budgetValues.append(budget) budgetValues.sort() totalBudgetCount = len(budgetValues) desiredBudgetCount = floor(totalBudgetCount / 4) budgetValues = budgetValues[:desiredBudgetCount] print("will work with bottom %d values out of %d which is the %f percent" % (desiredBudgetCount, totalBudgetCount, (len(budgetValues) * 100) / totalBudgetCount)) # Now budgetValues array contains all project budgets as # longs. Separate them into buckets numberOfBuckets = 100 lowBudgetValue = min(budgetValues) highBudgetValue = max(budgetValues) countPerBucket = [0] * numberOfBuckets for budgetValue in budgetValues: bucketIndex = valueToBucketIndex(numberOfBuckets, lowBudgetValue, highBudgetValue + 1, budgetValue) countPerBucket[bucketIndex] += 1 theTsv = open("espaHistoSecond25Percent.tsv", "w+") dx = (highBudgetValue - lowBudgetValue) / numberOfBuckets print("high value is %f, low value is %f, interval is %f" % (highBudgetValue, lowBudgetValue, dx)) for i in range(0, len(countPerBucket)): print("%d\t%d\t%d" % ((lowBudgetValue + i * dx), dx, countPerBucket[i]), file=theTsv) theTsv.flush() theTsv.close()
def extractBudgetCountsAll(): budgetValues = [] for project in Project.objects.all(): budget = convertAmountStringToInteger(project.budget) budgetValues.append(budget) # Now budgetValues array contains all project budgets as # longs. Separate them into buckets numberOfBuckets = 100 lowBudgetValue = min(budgetValues) highBudgetValue = max(budgetValues) countPerBucket = [0] * numberOfBuckets for budgetValue in budgetValues: bucketIndex = valueToBucketIndex(numberOfBuckets, lowBudgetValue, highBudgetValue + 1, budgetValue) countPerBucket[bucketIndex] += 1 theTsv = open("espaHisto.tsv", "w+") dx = (highBudgetValue - lowBudgetValue) / numberOfBuckets print("high value is %f, low value is %f, interval is %f" % (highBudgetValue, lowBudgetValue, dx)) for i in range(0, len(countPerBucket)): print("%d\t%d\t%d" % ((lowBudgetValue + i * dx) / 1000000, dx / 1000000, countPerBucket[i]), file=theTsv) theTsv.flush() theTsv.close()