def getActualBracketVector(year): inputFilename = 'allBracketsTTT.json' with open(inputFilename, 'r') as inputFile: dataJson = inputFile.read().replace('\n', '') dataPyDict = json.loads(dataJson) bracketList = dataPyDict['brackets'] bracket = None for bracketDict in bracketList: bracket = buildBracketFromJson(bracketDict['bracket']) if bracket.year == year: break correctVector = [int(bracket.fullVector[i]) for i in range(len(bracket.fullVector))] return correctVector
with open('allBracketsTTT.json', 'r') as inputFile: jsonData = inputFile.read().replace('\n', '') dataPyDict = json.loads(jsonData) bracketList = dataPyDict['brackets'] # First 60 bits of Pick Favorite bracket string pfBracketString = '111111111000101111111111000101111111111000101111111111000101' #111111111000101 #111111111000101 #111111111000101 #111111111000101 # 2015: #111111111101111 #111110011010101 #111101110001000 #111100111110101 pfVector = [int(pfBracketString[i]) for i in range(len(pfBracketString))] for bracketDict in bracketList: bracket = buildBracketFromJson(bracketDict['bracket']) actualFirst60 = bracket.fullVector[0:60] actualVector = [int(actualFirst60[i]) for i in range(len(actualFirst60))] hammingDist = sum(ch1 != ch2 for ch1, ch2 in zip(pfVector, actualVector)) print '{0}: {1}'.format(bracket.year, hammingDist) # print '{0:<20s}{1}'.format('Actual: ', actualFirst60) # print '{0:<20s}{1}'.format('Pick Fav.: ', pfBracketString) # print ''
def performFixedAlphaExperiments(numTrials, year, isFixedFirstRoundAlphas, isFixedK, rangeK, batchNumber): inputFilename = 'allBracketsTTT.json' with open(inputFilename, 'r') as inputFile: dataJson = inputFile.read().replace('\n', '') dataPyDict = json.loads(dataJson) bracketList = dataPyDict['brackets'] bracket = None for bracketDict in bracketList: bracket = buildBracketFromJson(bracketDict['bracket']) if bracket.year == year: break correctVector = [int(bracket.fullVector[i]) for i in range(len(bracket.fullVector))] # 0-th index is unused for easy indexing alphaAvg = [0, 1.2, 1.0, 1.0, 0.3, 1.3, 1.8] # Weighted average alpha values per round per year, rounded to two dec. places if year == 2013: alphaAvg = [0, 1.01, 1.10, 0.91, 0.36, 1.00, 1.54] elif year == 2014: alphaAvg = [0, 1.00, 1.03, 0.90, 0.23, 1.03, 1.54] elif year == 2015: alphaAvg = [0, 1.04, 1.03, 0.86, 0.19, 0.93, 1.55] elif year == 2016: alphaAvg = [0, 1.12, 1.02, 0.88, 0.22, 0.97, 1.57] elif year == 2017: alphaAvg = [0, 1.05, 1.01, 0.90, 0.14, 1.00, 1.35] else: alphaAvg = [0, 1.2, 1.0, 1.0, 0.3, 1.3, 1.8] # should not reach this; old guesses for alpha values brackets = [] for n in range(numTrials): # Sample to get new alpha values for each trial if isFixedK: alphaSwing = [rangeK for i in range(7)] else: alphaSwing = [] for i in range(7): swingVal = random.random() * rangeK alphaSwing.append(swingVal) alphaVals = [0] for roundNum in range(1, 7): roundAlpha = alphaAvg[roundNum] roundAlpha += (2 * random.random() - 1) * alphaSwing[roundNum] alphaVals.append(roundAlpha) newBracketVector = generateBracket(alphaVals, year, isFixedFirstRoundAlphas, alphaSwing[1]) # We pass in the K that will be used for R1, if needed newBracketScore = scoreBracket(newBracketVector, correctVector) numCorrectPicks = calcCorrectPicks(newBracketScore) newBracketString = ''.join(str(outcome) for outcome in newBracketVector) brackets.append({'bracketVector': newBracketString, 'score': newBracketScore, 'correctPicks': numCorrectPicks, 'alphaVals': ['{0:5.2f}'.format(alphaVals[j]) for j in range(1, 7)]}) bracketListDict = {'year': year, 'actualBracket': bracket.fullVector, 'brackets': brackets} outputFilename = 'Experiments/OneMillionTrials/Batch{5:02d}/generatedBrackets_{0}_{1}_{2}_{3}_{4:.2f}.json'.format(numTrials, year, isFixedFirstRoundAlphas, isFixedK, rangeK, batchNumber) with open(outputFilename, 'w') as outputFile: outputFile.write(json.dumps(bracketListDict))
outputFile = open(outputFilename, 'w') outputFile.write('{') outputFile.write('"brackets": [') with open(inputFilename, 'r') as inputFile: jsonData = inputFile.read().replace('\n', '') jsonToPython = json.loads(jsonData) bracketList = jsonToPython['brackets'] numBrackets = len(bracketList) for i in range(0, numBrackets): bracketDict = bracketList[i]['bracket'] bracket = buildBracketFromJson(bracketDict) for region in bracket.regions: region.vector = convertRegionVector(region.vector, bracketFormat) bracket.fullVector = ''.join([bracket.regions[j].vector for j in range(0, 4)]) + bracket.finalFour writeBracket(outputFile, bracket) if i < numBrackets - 1: outputFile.write(',') outputFile.write(']') outputFile.write('}') outputFile.close()
def testGofToUniform(pos1, pos2, pos3, isPooled, outputFile): import json from bracketClassDefinitions import Bracket from bracketClassDefinitions import Region from bracketClassDefinitions import buildBracketFromJson DEBUG = True formats = ['TTT', 'TTF', 'TFT', 'TFF', 'FTT', 'FTF', 'FFT', 'FFF'] patterns = ['000', '001', '010', '011', '100', '101', '110', '111'] if pos3 == -1: patterns = ['00', '01', '10', '11'] for formatType in formats: patternFreqs = [0 for i in range(len(patterns))] filename = 'Brackets/{0}/allBrackets{0}.json'.format(formatType) with open(filename, 'r') as inputFile: jsonData = inputFile.read().replace('\n', '') jsonToPython = json.loads(jsonData) bracketList = jsonToPython['brackets'] numBrackets = len(bracketList) for i in range(numBrackets): bracketDict = bracketList[i]['bracket'] bracket = buildBracketFromJson(bracketDict) numRegions = 1 if isPooled: numRegions = 4 for region in range(numRegions): offset = region * 15 pos1Result = int(bracket.fullVector[pos1 + offset]) pos2Result = int(bracket.fullVector[pos2 + offset]) if pos3 == -1: pos3Result = 0 else: pos3Result = int(bracket.fullVector[pos3 + offset]) nPatterns = len(patterns) index = pos1Result * nPatterns / 2 + pos2Result * nPatterns / 4 + pos3Result patternFreqs[index] = patternFreqs[index] + 1 numBrackets = 33 * numRegions expFreq = numBrackets * 1.0 / nPatterns chiSquare = 0 for i in range(nPatterns): chiSquare += (patternFreqs[i] - expFreq)**2 / expFreq chiSquareLine = 'chi-square value = {0}'.format(chiSquare) print formatType print patterns print patternFreqs print chiSquareLine print '' if not outputFile is None: header = '{3}: GOF Test vs. Uniform For Positions {0}, {1}, and {2}:\n'.format( pos1, pos2, pos3, formatType) if pos3 == -1: header = '{0}: GOF Test vs. Uniform For Positions {1} and {2}:\n'.format( formatType, pos1, pos2) outputFile.write(header) outputFile.write(patterns) outputFile.write('\n') outputFile.write(patternFreqs) outputFile.write('\n{0}\n\n'.format(chiSquareLine))
def testPairwiseIndependence(pos1, pos2, formatType, outputFile, isPooled=False): import json from bracketClassDefinitions import Bracket from bracketClassDefinitions import Region from bracketClassDefinitions import buildBracketFromJson DEBUG = False patterns = ['00', '01', '10', '11'] patternFreqs = [0 for i in range(4)] filename = 'Brackets/{0}/allBrackets{0}.json'.format(formatType) with open(filename, 'r') as inputFile: jsonData = inputFile.read().replace('\n', '') jsonToPython = json.loads(jsonData) bracketList = jsonToPython['brackets'] numBrackets = len(bracketList) for i in range(numBrackets): bracketDict = bracketList[i]['bracket'] bracket = buildBracketFromJson(bracketDict) nRegions = 1 if isPooled: nRegions = 4 for region in range(nRegions): offset = region * 15 pos1Result = int(bracket.fullVector[pos1 + offset]) pos2Result = int(bracket.fullVector[pos2 + offset]) index = pos1Result * 2 + pos2Result patternFreqs[index] = patternFreqs[index] + 1 rowSums = [ patternFreqs[0] + patternFreqs[1], patternFreqs[2] + patternFreqs[3] ] colSums = [ patternFreqs[0] + patternFreqs[2], patternFreqs[1] + patternFreqs[3] ] # The chi-square critical value for 1 degree of freedom and alpha = 0.05 # is 3.841. (Source: http://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm) # We print the result only if it is deemed significant. isSignificant = 0 chiSquare = 0 nObservations = numBrackets * nRegions for r in range(len(rowSums)): for c in range(len(colSums)): expFreq = rowSums[r] * colSums[c] * 1.0 / nObservations if expFreq > 0: obsFreq = patternFreqs[2 * r + c] * 1.0 chiSquare += (obsFreq - expFreq)**2 / expFreq else: if DEBUG: print '{0}: Game {1} (left) vs. Game {2} (top)'.format( formatType, pos1, pos2) print 'Cannot perform chi-square test of independence: expected frequency is 0.' return isSignificant header = '{0}: Bits {1} and {2}'.format(formatType, pos1, pos2) chiSquareLine = 'c^2 = {:<6.4f}\n\n'.format(chiSquare) if chiSquare >= 3.841: # outputFile.write('{0}: {1}'.format(header, chiSquareLine)) outputFile.write('{0:02d} {1:02d} --- {2:>7.4f}\n'.format( pos1, pos2, chiSquare)) isSignificant = 1 if DEBUG: print header print ' | 0 | 1 || Total' print '--------------------------' print ' 0 | {:<3} | {:<3} || {:<3}'.format(patternFreqs[0], patternFreqs[1], rowSums[0]) print '--------------------------' print ' 1 | {:<3} | {:<3} || {:<3}'.format(patternFreqs[2], patternFreqs[3], rowSums[1]) print '--------------------------' print '--------------------------' print 'Total | {:<3} | {:<3} || {:<3}\n'.format( colSums[0], colSums[1], numBrackets * 4) print chiSquareLine return isSignificant