def playGenerationGames(agentList):

    agentWinCount = np.zeros(len(agentList))  # init wincount array
    agentPlayCount = np.zeros(len(agentList))  # init playcount array
    agentWinRate = np.zeros(len(agentList))  # init winrate array

    for indexA in range(len(agentList) - 1):  # iterate
        for indexB in range(indexA + 1, len(agentList)):
            genesA = agentList[indexA]
            genesB = agentList[indexB]

            for x in range(NUM_GAMES_PER_PAIR):  # play x games
                # set player colour
                if x % 2 == 0:
                    colourA, colourB = 'w', 'b'
                else:
                    colourA, colourB = 'b', 'w'

                blackWon = playGame(colourA, genesA, colourB, genesB, 1, DEBUG)
                agentPlayCount[indexA] += 1
                agentPlayCount[indexB] += 1

                if (colourA == 'b'):
                    agentWinCount[indexA] += blackWon
                    agentWinCount[indexB] += (1 - blackWon)
                else:
                    agentWinCount[indexB] += blackWon
                    agentWinCount[indexA] += (1 - blackWon)

    for i in range(len(agentList)):
        agentWinRate[i] = agentWinCount[i] / agentPlayCount[i]

    return list(agentWinRate)
def getBaselineWinrate(baselineAgents, agent):
    numBaselineGames = 30
    numBaselineAgents = len(baselineAgents)
    winCount = 0  # init

    for baselineAgent in baselineAgents:
        for i in range(numBaselineGames):
            if i % 2 == 0:
                colourBaseline, colourAgent = 'w', 'b'
            else:
                colourBaseline, colourAgent = 'b', 'w'

            blackWon = playGame(colourBaseline, baselineAgent, colourAgent,
                                agent, 1, DEBUG)

            if (colourAgent == 'b'):
                winCount += blackWon
            else:
                winCount += (1 - blackWon)

    return winCount / (numBaselineGames * numBaselineAgents)
Ejemplo n.º 3
0
def main():

    with open("agentList") as f:
        agentList = f.read().splitlines(
        )  # read agent filenames from "/agentList"

    agentWinCount = np.zeros(len(agentList))  # init wincount array
    agentPlayCount = np.zeros(len(agentList))  # init playcount array
    agentWinRate = np.zeros(len(agentList))  # init winrate array

    for indexA in range(len(agentList) - 1):  # iterate
        for indexB in range(indexA + 1, len(agentList)):
            # open gene files
            with open(AGENT_DIR + agentList[indexA]) as f:
                genesA = f.read().splitlines()
            with open(AGENT_DIR + agentList[indexB]) as f:
                genesB = f.read().splitlines()

            for x in range(NUM_GAMES_PER_PAIR):  # play x games
                # set player colour
                if x % 2 == 0:
                    colourA, colourB = 'w', 'b'
                else:
                    colourA, colourB = 'b', 'w'

                blackWon = playGame(colourA, genesA, colourB, genesB, 1, DEBUG)
                agentPlayCount[indexA] += 1
                agentPlayCount[indexB] += 1

                if (colourA == 'b'):
                    agentWinCount[indexA] += blackWon
                    agentWinCount[indexB] += (1 - blackWon)
                else:
                    agentWinCount[indexB] += blackWon
                    agentWinCount[indexA] += (1 - blackWon)

    for i in range(len(agentList)):
        agentWinRate[i] = agentWinCount[i] / agentPlayCount[i]

    printWinRate(agentList, agentWinRate)
def evolveAgents():
    #load the last run's best agent to evaluate against
    priorFinalAgent = []
    with open('FinalAgent', 'r') as f:
        priorFinalAgent = [float(i) for i in f.read().splitlines()]
        f.close()

    #a list of lists of floats, each list corresponding to an agent
    listOfGenes = []
    with open(AGENT_DIR + "agentList") as f:
        agentList = f.read().splitlines(
        )  # read agent filenames from "/agentList"
        for agentFile in agentList:
            with open(AGENT_DIR + agentFile) as f2:
                #read in the genes as a list and convert each entry to a float
                listOfGenes.append([float(i) for i in f2.read().splitlines()])

    lastBaselineWinRate = 0

    baselineAgents = createBaselineAgents()

    bestWinRate = 0
    bestAgent = []
    # a numpy RNG used for parent selection
    generator = np.random.default_rng()

    #this is for the purposes of graphing
    allWinrates = [0] * 20
    generationGenes = [[0 for i in range(0, 20)]
                       for j in range(0, 10)]  #creates 10 lists of size 20

    for generationIndex in range(0, 20):
        #each generation we do as follows:
        #increment the index for debugging
        #play the generation games to get all the winrates
        #compute the winrate delta of the best agent
        #if the delta is too small, stop and return the best agent
        #else, normalize the winrates of each agent and use it to pick parents
        #make 7 new kids and take the 4 highest rated agents from last generation
        #repeat
        print("Beginning generation " + str(generationIndex))

        genWinRates = playGenerationGames(listOfGenes)

        #get the win rate and index of the best agent
        #this is the true winrate in the generation, not agaisnt baseline
        #we use this to choose the agent to score VS baseline
        genWinRate = max(genWinRates)
        bestIndex = genWinRates.index(genWinRate)
        genWinRate = round(genWinRate * 100, 3)  #make it nicely formatted now
        genBestAgent = listOfGenes[bestIndex]

        genBestAgentWonGames = 0
        #test the best agent in 100 games against last trial's best
        for x in range(100):  # play x games
            # set player colour
            if x % 2 == 0:
                colourA, colourB = 'w', 'b'
            else:
                colourA, colourB = 'b', 'w'

            blackWon = playGame(colourA, genBestAgent, colourB,
                                priorFinalAgent, 1, DEBUG)

            if ((colourA == 'b' and blackWon)
                    or (colourA == 'w' and not blackWon)):
                genBestAgentWonGames += 1

        print("Best agent of generation " + str(generationIndex) +
              " has winrate " + str(genBestAgentWonGames) +
              "% against past baseline.")

        #in this generation the only scoring metric is the winrate vs baseline
        if genBestAgentWonGames > bestWinRate:
            bestWinRate = genBestAgentWonGames
            bestAgent = copy.deepcopy(genBestAgent)

        #record data for the purposes of graphing
        #first, the winrate vs baseline
        allWinrates[generationIndex] = genBestAgentWonGames

        #then each gene
        for index in range(NUM_GENES):
            generationGenes[index][generationIndex] = genBestAgent[index]

        #else, new generation
        normalizedList = normalizeList(genWinRates)
        nextGeneration = []

        # generate children from parents
        for newChild in range(0, NUM_CHILD_MUTATE):
            child = [0] * NUM_GENES
            #this chooses 2 parents without replacement, using the final list as probabilities
            parents = generator.choice(listOfGenes, 2, False, normalizedList)
            for gene in range(NUM_GENES):
                #each gene is the average of its parents
                child[gene] = round((parents[0][gene] + parents[1][gene]) / 2,
                                    3)

            #mutate each child, potentially
            mutateAgent(child, generator)
            nextGeneration.append(child)

        # generate new children with random genes
        for newChild in range(0, NUM_CHILD_RANDOM):
            child = [0] * NUM_GENES
            for gene in range(NUM_GENES):
                child[gene] = (
                    GENE_VAL_MAX - GENE_VAL_MIN
                ) * np.random.random_sample(
                ) + GENE_VAL_MIN  # generate random gene in range [GENE_VAL_MIN, GENE_VAL_MAX)

            nextGeneration.append(child)

        #now we apply our elitism, taking the 4 best members of this generation
        #this sorts the list and produces the 4 highest (original) indices
        eliteIndices = sorted([(x, i) for (i, x) in enumerate(genWinRates)],
                              reverse=True)[:NUM_CHILD_ELITE]
        for index in range(NUM_CHILD_ELITE):
            nextGeneration.append(listOfGenes[eliteIndices[index][1]])

        #the next generation is now complete
        listOfGenes = nextGeneration

    #save best of all time agent
    f = open("FinalAgent", 'w')
    for gene in bestAgent:
        f.write(str(gene) + '\n')
    f.close()
    print("Final agent discovered, winrate of " + str(bestWinRate) + "%")

    #now we want to make graphs of all our data
    #first graph the winrates
    xAxis = [i for i in range(1, 21)]
    ax = plotlib.subplots()[1]
    ax.xaxis.set_major_formatter(FormatStrFormatter('%1.0f'))
    plotlib.plot(xAxis, allWinrates)
    plotlib.xlabel('Generation')
    plotlib.ylabel('Best Observed Winrate')
    plotlib.title('Past Agent Baseline Winrate by Generation')
    plotlib.savefig('PastWinrate.png')

    #now the evolution of each gene
    for geneListIndex in range(0, 10):
        plotlib.clf()  #clear the plot to make a new file
        ax.xaxis.set_major_formatter(FormatStrFormatter('%1.0f'))
        yValues = generationGenes[geneListIndex]
        plotlib.plot(xAxis, yValues)
        plotlib.xlabel("Generation")
        plotlib.ylabel('Gene Value')
        plotlib.title('Gene ' + str(geneListIndex + 1) + ' by Generation')
        plotlib.savefig('PastGene' + str(geneListIndex + 1) + '.png')
def evolveAgents():
    #load the last run's best agent to evaluate against
    priorFinalAgent = []
    with open('FinalAgent', 'r') as f:
        priorFinalAgent = [float(i) for i in f.read().splitlines()]
        f.close()

    #a list of lists of floats, each list corresponding to an agent
    listOfGenes = []
    with open(AGENT_DIR + "agentList") as f:
        agentList = f.read().splitlines(
        )  # read agent filenames from "/agentList"
        for agentFile in agentList:
            with open(AGENT_DIR + agentFile) as f2:
                #read in the genes as a list and convert each entry to a float
                listOfGenes.append([float(i) for i in f2.read().splitlines()])

    generationIndex = 0
    lastBaselineWinRate = 0

    baselineAgents = createBaselineAgents()

    bestWinRate = 0
    bestAgent = []
    # a numpy RNG used for parent selection
    generator = np.random.default_rng()
    while True:
        #each generation we do as follows:
        #increment the index for debugging
        #play the generation games to get all the winrates
        #compute the winrate delta of the best agent
        #if the delta is too small, stop and return the best agent
        #else, normalize the winrates of each agent and use it to pick parents
        #make 7 new kids and take the 4 highest rated agents from last generation
        #repeat
        generationIndex += 1
        print("Beginning generation " + str(generationIndex))

        genWinRates = playGenerationGames(listOfGenes)

        #get the win rate and index of the best agent
        #this is the true winrate in the generation, not agaisnt baseline
        #we use this to track what will become our new final agent
        genWinRate = max(genWinRates)
        bestIndex = genWinRates.index(genWinRate)
        genWinRate = round(genWinRate * 100, 3)  #make it nicely formatted now
        genBestAgent = listOfGenes[bestIndex]

        print("Best agent of generation " + str(generationIndex) +
              " winrate of " + str(genWinRate) + "%" +
              " against the other agent of the same generation")

        bestGenerationAgent = listOfGenes[genWinRates.index(max(genWinRates))]
        baselineWinrate = getBaselineWinrate(baselineAgents,
                                             bestGenerationAgent)
        print("Best agent of generation " + str(generationIndex) +
              " winrate of " + str(round(baselineWinrate * 100, ROUND_DIGIT)) +
              "%" + " against static baseline agents\n")

        #if its better than the last best agent, save it
        if genWinRate > bestWinRate:
            bestWinRate = genWinRate
            bestAgent = genBestAgent

        genBestAgentWonGames = 0
        #test the best agent in 100 games against last trial's best
        for x in range(100):  # play x games
            # set player colour
            if x % 2 == 0:
                colourA, colourB = 'w', 'b'
            else:
                colourA, colourB = 'b', 'w'

            blackWon = playGame(colourA, bestAgent, colourB, priorFinalAgent,
                                1, DEBUG)

            if ((colourA == 'b' and blackWon)
                    or (colourA == 'w' and not blackWon)):
                genBestAgentWonGames += 1

        #if the baseline winrate improved, but not by enough, end the process
        #since we do 100 games, we can directly use the won game count as the winrate
        if ((genBestAgentWonGames > lastBaselineWinRate) and
            (genBestAgentWonGames - lastBaselineWinRate <= WINRATE_THRESHOLD)):
            #stop now
            f = open("FinalAgent", 'w')
            for gene in bestAgent:
                f.write(str(gene) + '\n')
            f.close()
            print("Final agent discovered, winrate of " +
                  str(genBestAgentWonGames) +
                  "% was below improvement threshold")
            return

        #else, new generation
        lastBaselineWinRate = genBestAgentWonGames
        normalizedList = normalizeList(genWinRates)
        nextGeneration = []

        # generate children from parents
        for newChild in range(0, NUM_CHILD_MUTATE):
            child = [0] * NUM_GENES
            #this chooses 2 parents without replacement, using the final list as probabilities
            parents = generator.choice(listOfGenes, 2, False, normalizedList)
            for gene in range(NUM_GENES):
                #each gene is the average of its parents
                child[gene] = round((parents[0][gene] + parents[1][gene]) / 2,
                                    3)

            #mutate each child, potentially
            mutateAgent(child, generator)
            nextGeneration.append(child)

        # generate new children with random genes
        for newChild in range(0, NUM_CHILD_RANDOM):
            child = [0] * NUM_GENES
            for gene in range(NUM_GENES):
                child[gene] = (
                    GENE_VAL_MAX - GENE_VAL_MIN
                ) * np.random.random_sample(
                ) + GENE_VAL_MIN  # generate random gene in range [GENE_VAL_MIN, GENE_VAL_MAX)

            nextGeneration.append(child)

        #now we apply our elitism, taking the 4 best members of this generation
        #this sorts the list and produces the 4 highest (original) indices
        eliteIndices = sorted([(x, i) for (i, x) in enumerate(genWinRates)],
                              reverse=True)[:NUM_CHILD_ELITE]
        for index in range(NUM_CHILD_ELITE):
            nextGeneration.append(listOfGenes[eliteIndices[index][1]])

        #the next generation is now complete
        listOfGenes = nextGeneration