Esempio n. 1
0
def eval_against_all(genomes, config):
    for genome_id, genome in genomes:
        model = neat.nn.FeedForwardNetwork.create(genome, config)
        genome.fitness = 0.0
        sizeX = 5
        sizeY = 5
        for opp in [(game.RandomPlayer,0),(game.AILook1Player,1),(game.AILook1Player,5)]:
            for episode in range(0,50):
                board = game.Board(sizeX, sizeY)
                if opp[0].__name__ == "RandomPlayer":
                    board.setPlayers(game.AITrainingReduFeatPlayer("1",board,model,2), opp[0]("2"))
                else:
                    board.setPlayers(game.AITrainingReduFeatPlayer("1",board,model,2), opp[0]("2",board,opp[1]))
                board.startGameOnDifferentEnds()
                result = board.play(False)
                if result == 1:
                    genome.fitness += 1.0
                elif result == 2:
                    genome.fitness -= 1.0
def trainModelWithDMC(p_boardSizeX,
                      p_boardSizeY,
                      p_episodes,
                      savePath,
                      p_temperature=20.0,
                      p_gamma=0.9,
                      howFar=3,
                      rewardFunc=game.rewardFunc2,
                      selfPlayFixOpp=False,
                      startingModel=None,
                      opponentConstr=None,
                      opponentStrength=None):

    model = startingModel

    if startingModel is None:
        model = Sequential()
        model.add(
            Dense(70,
                  kernel_initializer='lecun_uniform',
                  activation='relu',
                  input_shape=(4 + (2 * howFar + 1) * (2 * howFar + 1), )))

        model.add(
            Dense(35, kernel_initializer='lecun_uniform', activation='relu'))

        model.add(
            Dense(3, kernel_initializer='lecun_uniform', activation='linear'))

        model.compile(loss='mean_squared_error', optimizer=RMSprop())

    episodes = p_episodes
    gamma = p_gamma
    startingTemperature = p_temperature
    temperature = p_temperature
    batchSize = 1
    buffer = 1
    replay = []
    h = 0
    updateStep = 0
    for i in range(episodes):

        rewardList = []
        stateList = []
        actionList = []

        #init board
        board = game.Board(p_boardSizeX, p_boardSizeY)

        #set opponent
        if (opponentConstr is None):
            if (selfPlayFixOpp == True):
                oppModel = clone_model(model)
                oppModel.set_weights(model.get_weights())
                board.setPlayers(
                    game.AITrainingReduFeatPlayer("1", board, model, howFar),
                    game.AITrainingReduFeatPlayer("2", board, oppModel,
                                                  howFar))
            else:
                board.setPlayers(
                    game.AITrainingReduFeatPlayer("1", board, model, howFar),
                    game.AITrainingReduFeatPlayer("2", board, model, howFar))
        else:
            try:
                opp = opponentConstr("2", board, opponentStrength)
            except TypeError:
                opp = opponentConstr("2")
            board.setPlayers(
                game.AITrainingReduFeatPlayer("1", board, model, howFar), opp)

        #set start strategy
        board.startGameWithPseudoRandomStartPositions()
        while (board.checkGameStatus() == 0):

            #Boltzman action selection
            board.player1.getDirection()
            Qprobs = game.softmax(board.player1.vals / temperature)
            action_value = np.random.choice(Qprobs[0], p=Qprobs[0])
            action = np.argmax(Qprobs[0] == action_value) - 1
            actionList.append(action)
            #Take action, observe new state S'

            state = board.to01ReducedFeatures(board.player1, howFar).reshape(
                1, 4 + (2 * howFar + 1) * (2 * howFar + 1))
            stateList.append(state)

            board.movePlayers(action, board.player2.getDirection())

            gameStatus = board.checkGameStatus()

            #Observe reward
            reward = rewardFunc(gameStatus)

            rewardList.append(reward)

        dAC = discountedAccRewards(rewardList, gamma)
        for stepNum in range(len(rewardList)):

            if (len(replay) < buffer):
                replay.append(
                    (stateList[stepNum], actionList[stepNum], dAC[stepNum]))
            else:
                if (h < (buffer - 1)):
                    h += 1
                else:
                    h = 0
                replay[h] = (stateList[stepNum], actionList[stepNum],
                             dAC[stepNum])
                #randomly sample our experience replay memory
                minibatch = random.sample(replay, batchSize)
                X_train = []
                y_train = []

                for memory in minibatch:
                    state, action, accReward = memory
                    Qvals = model.predict(state, batch_size=1)
                    y = np.zeros((1, 3))
                    y[:] = Qvals[:]
                    y[0][action +
                         1] = accReward  #action + 1 because actions are -1,0,1
                    X_train.append(
                        state.reshape(4 + (2 * howFar + 1) *
                                      (2 * howFar + 1)), )
                    y_train.append(y.reshape(3, ))

                X_train = np.array(X_train)
                y_train = np.array(y_train)
                print("Game #: %s" % (i, ))
                model.fit(X_train,
                          y_train,
                          batch_size=batchSize,
                          epochs=1,
                          verbose=1)
                updateStep += 1
        if i % 10000 == 0:
            model.save(savePath)
        if temperature > 1.0:
            temperature -= (startingTemperature / episodes)
        else:
            temperature = 1.0
    model.save(savePath)