def playGenerationGames(agentList): agentWinCount = np.zeros(len(agentList)) # init wincount array agentPlayCount = np.zeros(len(agentList)) # init playcount array agentWinRate = np.zeros(len(agentList)) # init winrate array for indexA in range(len(agentList) - 1): # iterate for indexB in range(indexA + 1, len(agentList)): genesA = agentList[indexA] genesB = agentList[indexB] for x in range(NUM_GAMES_PER_PAIR): # play x games # set player colour if x % 2 == 0: colourA, colourB = 'w', 'b' else: colourA, colourB = 'b', 'w' blackWon = playGame(colourA, genesA, colourB, genesB, 1, DEBUG) agentPlayCount[indexA] += 1 agentPlayCount[indexB] += 1 if (colourA == 'b'): agentWinCount[indexA] += blackWon agentWinCount[indexB] += (1 - blackWon) else: agentWinCount[indexB] += blackWon agentWinCount[indexA] += (1 - blackWon) for i in range(len(agentList)): agentWinRate[i] = agentWinCount[i] / agentPlayCount[i] return list(agentWinRate)
def getBaselineWinrate(baselineAgents, agent): numBaselineGames = 30 numBaselineAgents = len(baselineAgents) winCount = 0 # init for baselineAgent in baselineAgents: for i in range(numBaselineGames): if i % 2 == 0: colourBaseline, colourAgent = 'w', 'b' else: colourBaseline, colourAgent = 'b', 'w' blackWon = playGame(colourBaseline, baselineAgent, colourAgent, agent, 1, DEBUG) if (colourAgent == 'b'): winCount += blackWon else: winCount += (1 - blackWon) return winCount / (numBaselineGames * numBaselineAgents)
def main(): with open("agentList") as f: agentList = f.read().splitlines( ) # read agent filenames from "/agentList" agentWinCount = np.zeros(len(agentList)) # init wincount array agentPlayCount = np.zeros(len(agentList)) # init playcount array agentWinRate = np.zeros(len(agentList)) # init winrate array for indexA in range(len(agentList) - 1): # iterate for indexB in range(indexA + 1, len(agentList)): # open gene files with open(AGENT_DIR + agentList[indexA]) as f: genesA = f.read().splitlines() with open(AGENT_DIR + agentList[indexB]) as f: genesB = f.read().splitlines() for x in range(NUM_GAMES_PER_PAIR): # play x games # set player colour if x % 2 == 0: colourA, colourB = 'w', 'b' else: colourA, colourB = 'b', 'w' blackWon = playGame(colourA, genesA, colourB, genesB, 1, DEBUG) agentPlayCount[indexA] += 1 agentPlayCount[indexB] += 1 if (colourA == 'b'): agentWinCount[indexA] += blackWon agentWinCount[indexB] += (1 - blackWon) else: agentWinCount[indexB] += blackWon agentWinCount[indexA] += (1 - blackWon) for i in range(len(agentList)): agentWinRate[i] = agentWinCount[i] / agentPlayCount[i] printWinRate(agentList, agentWinRate)
def evolveAgents(): #load the last run's best agent to evaluate against priorFinalAgent = [] with open('FinalAgent', 'r') as f: priorFinalAgent = [float(i) for i in f.read().splitlines()] f.close() #a list of lists of floats, each list corresponding to an agent listOfGenes = [] with open(AGENT_DIR + "agentList") as f: agentList = f.read().splitlines( ) # read agent filenames from "/agentList" for agentFile in agentList: with open(AGENT_DIR + agentFile) as f2: #read in the genes as a list and convert each entry to a float listOfGenes.append([float(i) for i in f2.read().splitlines()]) lastBaselineWinRate = 0 baselineAgents = createBaselineAgents() bestWinRate = 0 bestAgent = [] # a numpy RNG used for parent selection generator = np.random.default_rng() #this is for the purposes of graphing allWinrates = [0] * 20 generationGenes = [[0 for i in range(0, 20)] for j in range(0, 10)] #creates 10 lists of size 20 for generationIndex in range(0, 20): #each generation we do as follows: #increment the index for debugging #play the generation games to get all the winrates #compute the winrate delta of the best agent #if the delta is too small, stop and return the best agent #else, normalize the winrates of each agent and use it to pick parents #make 7 new kids and take the 4 highest rated agents from last generation #repeat print("Beginning generation " + str(generationIndex)) genWinRates = playGenerationGames(listOfGenes) #get the win rate and index of the best agent #this is the true winrate in the generation, not agaisnt baseline #we use this to choose the agent to score VS baseline genWinRate = max(genWinRates) bestIndex = genWinRates.index(genWinRate) genWinRate = round(genWinRate * 100, 3) #make it nicely formatted now genBestAgent = listOfGenes[bestIndex] genBestAgentWonGames = 0 #test the best agent in 100 games against last trial's best for x in range(100): # play x games # set player colour if x % 2 == 0: colourA, colourB = 'w', 'b' else: colourA, colourB = 'b', 'w' blackWon = playGame(colourA, genBestAgent, colourB, priorFinalAgent, 1, DEBUG) if ((colourA == 'b' and blackWon) or (colourA == 'w' and not blackWon)): genBestAgentWonGames += 1 print("Best agent of generation " + str(generationIndex) + " has winrate " + str(genBestAgentWonGames) + "% against past baseline.") #in this generation the only scoring metric is the winrate vs baseline if genBestAgentWonGames > bestWinRate: bestWinRate = genBestAgentWonGames bestAgent = copy.deepcopy(genBestAgent) #record data for the purposes of graphing #first, the winrate vs baseline allWinrates[generationIndex] = genBestAgentWonGames #then each gene for index in range(NUM_GENES): generationGenes[index][generationIndex] = genBestAgent[index] #else, new generation normalizedList = normalizeList(genWinRates) nextGeneration = [] # generate children from parents for newChild in range(0, NUM_CHILD_MUTATE): child = [0] * NUM_GENES #this chooses 2 parents without replacement, using the final list as probabilities parents = generator.choice(listOfGenes, 2, False, normalizedList) for gene in range(NUM_GENES): #each gene is the average of its parents child[gene] = round((parents[0][gene] + parents[1][gene]) / 2, 3) #mutate each child, potentially mutateAgent(child, generator) nextGeneration.append(child) # generate new children with random genes for newChild in range(0, NUM_CHILD_RANDOM): child = [0] * NUM_GENES for gene in range(NUM_GENES): child[gene] = ( GENE_VAL_MAX - GENE_VAL_MIN ) * np.random.random_sample( ) + GENE_VAL_MIN # generate random gene in range [GENE_VAL_MIN, GENE_VAL_MAX) nextGeneration.append(child) #now we apply our elitism, taking the 4 best members of this generation #this sorts the list and produces the 4 highest (original) indices eliteIndices = sorted([(x, i) for (i, x) in enumerate(genWinRates)], reverse=True)[:NUM_CHILD_ELITE] for index in range(NUM_CHILD_ELITE): nextGeneration.append(listOfGenes[eliteIndices[index][1]]) #the next generation is now complete listOfGenes = nextGeneration #save best of all time agent f = open("FinalAgent", 'w') for gene in bestAgent: f.write(str(gene) + '\n') f.close() print("Final agent discovered, winrate of " + str(bestWinRate) + "%") #now we want to make graphs of all our data #first graph the winrates xAxis = [i for i in range(1, 21)] ax = plotlib.subplots()[1] ax.xaxis.set_major_formatter(FormatStrFormatter('%1.0f')) plotlib.plot(xAxis, allWinrates) plotlib.xlabel('Generation') plotlib.ylabel('Best Observed Winrate') plotlib.title('Past Agent Baseline Winrate by Generation') plotlib.savefig('PastWinrate.png') #now the evolution of each gene for geneListIndex in range(0, 10): plotlib.clf() #clear the plot to make a new file ax.xaxis.set_major_formatter(FormatStrFormatter('%1.0f')) yValues = generationGenes[geneListIndex] plotlib.plot(xAxis, yValues) plotlib.xlabel("Generation") plotlib.ylabel('Gene Value') plotlib.title('Gene ' + str(geneListIndex + 1) + ' by Generation') plotlib.savefig('PastGene' + str(geneListIndex + 1) + '.png')
def evolveAgents(): #load the last run's best agent to evaluate against priorFinalAgent = [] with open('FinalAgent', 'r') as f: priorFinalAgent = [float(i) for i in f.read().splitlines()] f.close() #a list of lists of floats, each list corresponding to an agent listOfGenes = [] with open(AGENT_DIR + "agentList") as f: agentList = f.read().splitlines( ) # read agent filenames from "/agentList" for agentFile in agentList: with open(AGENT_DIR + agentFile) as f2: #read in the genes as a list and convert each entry to a float listOfGenes.append([float(i) for i in f2.read().splitlines()]) generationIndex = 0 lastBaselineWinRate = 0 baselineAgents = createBaselineAgents() bestWinRate = 0 bestAgent = [] # a numpy RNG used for parent selection generator = np.random.default_rng() while True: #each generation we do as follows: #increment the index for debugging #play the generation games to get all the winrates #compute the winrate delta of the best agent #if the delta is too small, stop and return the best agent #else, normalize the winrates of each agent and use it to pick parents #make 7 new kids and take the 4 highest rated agents from last generation #repeat generationIndex += 1 print("Beginning generation " + str(generationIndex)) genWinRates = playGenerationGames(listOfGenes) #get the win rate and index of the best agent #this is the true winrate in the generation, not agaisnt baseline #we use this to track what will become our new final agent genWinRate = max(genWinRates) bestIndex = genWinRates.index(genWinRate) genWinRate = round(genWinRate * 100, 3) #make it nicely formatted now genBestAgent = listOfGenes[bestIndex] print("Best agent of generation " + str(generationIndex) + " winrate of " + str(genWinRate) + "%" + " against the other agent of the same generation") bestGenerationAgent = listOfGenes[genWinRates.index(max(genWinRates))] baselineWinrate = getBaselineWinrate(baselineAgents, bestGenerationAgent) print("Best agent of generation " + str(generationIndex) + " winrate of " + str(round(baselineWinrate * 100, ROUND_DIGIT)) + "%" + " against static baseline agents\n") #if its better than the last best agent, save it if genWinRate > bestWinRate: bestWinRate = genWinRate bestAgent = genBestAgent genBestAgentWonGames = 0 #test the best agent in 100 games against last trial's best for x in range(100): # play x games # set player colour if x % 2 == 0: colourA, colourB = 'w', 'b' else: colourA, colourB = 'b', 'w' blackWon = playGame(colourA, bestAgent, colourB, priorFinalAgent, 1, DEBUG) if ((colourA == 'b' and blackWon) or (colourA == 'w' and not blackWon)): genBestAgentWonGames += 1 #if the baseline winrate improved, but not by enough, end the process #since we do 100 games, we can directly use the won game count as the winrate if ((genBestAgentWonGames > lastBaselineWinRate) and (genBestAgentWonGames - lastBaselineWinRate <= WINRATE_THRESHOLD)): #stop now f = open("FinalAgent", 'w') for gene in bestAgent: f.write(str(gene) + '\n') f.close() print("Final agent discovered, winrate of " + str(genBestAgentWonGames) + "% was below improvement threshold") return #else, new generation lastBaselineWinRate = genBestAgentWonGames normalizedList = normalizeList(genWinRates) nextGeneration = [] # generate children from parents for newChild in range(0, NUM_CHILD_MUTATE): child = [0] * NUM_GENES #this chooses 2 parents without replacement, using the final list as probabilities parents = generator.choice(listOfGenes, 2, False, normalizedList) for gene in range(NUM_GENES): #each gene is the average of its parents child[gene] = round((parents[0][gene] + parents[1][gene]) / 2, 3) #mutate each child, potentially mutateAgent(child, generator) nextGeneration.append(child) # generate new children with random genes for newChild in range(0, NUM_CHILD_RANDOM): child = [0] * NUM_GENES for gene in range(NUM_GENES): child[gene] = ( GENE_VAL_MAX - GENE_VAL_MIN ) * np.random.random_sample( ) + GENE_VAL_MIN # generate random gene in range [GENE_VAL_MIN, GENE_VAL_MAX) nextGeneration.append(child) #now we apply our elitism, taking the 4 best members of this generation #this sorts the list and produces the 4 highest (original) indices eliteIndices = sorted([(x, i) for (i, x) in enumerate(genWinRates)], reverse=True)[:NUM_CHILD_ELITE] for index in range(NUM_CHILD_ELITE): nextGeneration.append(listOfGenes[eliteIndices[index][1]]) #the next generation is now complete listOfGenes = nextGeneration