def eval_against_all(genomes, config): for genome_id, genome in genomes: model = neat.nn.FeedForwardNetwork.create(genome, config) genome.fitness = 0.0 sizeX = 6 sizeY = 6 for opp in [(game.RandomPlayer, 0), (game.AILook1Player, 1), (game.AILook1Player, 5)]: for episode in range(0, 50): board = game.Board(sizeX, sizeY) if opp[0].__name__ == "RandomPlayer": board.setPlayers( game.AITrainingReduFeatWOPlayer("1", board, model, 2), opp[0]("2")) else: board.setPlayers( game.AITrainingReduFeatWOPlayer("1", board, model, 2), opp[0]("2", board, opp[1])) board.startGameWithPseudoRandomStartPositions() result = board.play(False) if result == 1: genome.fitness += 1.0 elif result == 2: genome.fitness -= 1.0
def trainModelWithDMC(p_boardSizeX, p_boardSizeY, p_episodes, savePath, p_temperature=20.0, p_gamma=0.9, howFar=2, rewardFunc=game.rewardFunc2, selfPlayFixOpp=False, startingModel=None, opponentConstr=None, opponentStrength=None): model = startingModel if startingModel is None: model = Sequential() model.add( Dense(70, kernel_initializer='lecun_uniform', activation='relu', input_shape=(2 * (4 + (2 * howFar + 1) * (2 * howFar + 1)), ))) model.add( Dense(35, kernel_initializer='lecun_uniform', activation='relu')) model.add( Dense(3, kernel_initializer='lecun_uniform', activation='linear')) model.compile(loss='mean_squared_error', optimizer=RMSprop()) episodes = p_episodes gamma = p_gamma startingTemperature = p_temperature temperature = p_temperature batchSize = 1 buffer = 1 replay = [] h = 0 updateStep = 0 for i in range(episodes): rewardList = [] stateList = [] actionList = [] #init board board = game.Board(p_boardSizeX, p_boardSizeY) #set opponent if (opponentConstr is None): if (selfPlayFixOpp == True): oppModel = clone_model(model) oppModel.set_weights(model.get_weights()) board.setPlayers( game.AITrainingReduFeatWOPlayer("1", board, model, howFar), game.AITrainingReduFeatWOPlayer("2", board, oppModel, howFar)) else: board.setPlayers( game.AITrainingReduFeatWOPlayer("1", board, model, howFar), game.AITrainingReduFeatWOPlayer("2", board, model, howFar)) else: try: opp = opponentConstr("2", board, opponentStrength) except TypeError: opp = opponentConstr("2") board.setPlayers( game.AITrainingReduFeatWOPlayer("1", board, model, howFar), opp) #set start strategy board.startGameWithPseudoRandomStartPositions() while (board.checkGameStatus() == 0): #Boltzman action selection board.player1.getDirection() Qprobs = game.softmax(board.player1.vals / temperature) action_value = np.random.choice(Qprobs[0], p=Qprobs[0]) action = np.argmax(Qprobs[0] == action_value) - 1 actionList.append(action) #Take action, observe new state S' state = board.to01ReducedFeaturesWithOpponent( board.player1, board.player2, howFar).reshape(1, 2 * (4 + (2 * howFar + 1) * (2 * howFar + 1))) stateList.append(state) board.movePlayers(action, board.player2.getDirection()) gameStatus = board.checkGameStatus() #Observe reward reward = rewardFunc(gameStatus) rewardList.append(reward) dAC = discountedAccRewards(rewardList, gamma) for stepNum in range(len(rewardList)): if (len(replay) < buffer): replay.append( (stateList[stepNum], actionList[stepNum], dAC[stepNum])) else: if (h < (buffer - 1)): h += 1 else: h = 0 replay[h] = (stateList[stepNum], actionList[stepNum], dAC[stepNum]) #randomly sample our experience replay memory minibatch = random.sample(replay, batchSize) X_train = [] y_train = [] for memory in minibatch: state, action, accReward = memory Qvals = model.predict(state, batch_size=1) y = np.zeros((1, 3)) y[:] = Qvals[:] y[0][action + 1] = accReward #action + 1 because actions are -1,0,1 X_train.append( state.reshape( 2 * (4 + (2 * howFar + 1) * (2 * howFar + 1)), )) y_train.append(y.reshape(3, )) X_train = np.array(X_train) y_train = np.array(y_train) print("Game #: %s" % (i, )) model.fit(X_train, y_train, batch_size=batchSize, epochs=1, verbose=1) updateStep += 1 if i % 10000 == 0: model.save(savePath) if temperature > 1.0: temperature -= (startingTemperature / episodes) else: temperature = 1.0 model.save(savePath)
from keras.models import load_model import game from util import loadNEATmodel gameStats = {} gameStats[1] = 0 gameStats[2] = 0 gameStats[3] = 0 for episode in range(0, 60): board = game.Board(6, 6) #board.setPlayers(game.AITrainingReduFeatPlayer("1",board,loadNEATmodel('training/resultsPSR/NEAT/RF3-PSR','training/config-neat-RF3.txt'),3),game.AILook1Player("2",board,5)) board.setPlayers( game.AITrainingReduFeatWOPlayer( "1", board, load_model('training/resultsPSR/opp/modelSelfPlayBig2'), 2), game.AILook1Player("2", board, 5)) # game.RandomPlayer("2") board.startGameWithPseudoRandomStartPositions() result = board.play(False) print("Ends with: ", result) gameStats[result] += 1 print("Game: ", episode) print(gameStats) print('5')
import game from util import loadNEATmodel from keras.models import load_model #upotrijebiti odgovarajućeg AITrainingPlayer-a za svoj model board = game.Board(6, 6) #board.setPlayers(game.AITrainingPlayer("1",board,loadNEATmodel('training/resultsPSR/NEAT/RF3-PSR','training/config-neat-RF3.txt')),game.AILook1Player("2",board,1)) board.setPlayers( game.AITrainingReduFeatWOPlayer( "1", board, load_model('training/resultsPSR/techn/modelDQLF'), 2), game.HumanPlayer("2")) #board.setPlayers(game.HumanPlayer("1"),game.HumanPlayer("2")) board.startGameWithPseudoRandomStartPositions() print("Game ends with:", board.play(True))
def trainModelWithDQL(p_boardSizeX, p_boardSizeY, p_episodes, savePath, p_temperature=20.0, p_gamma=0.9, fixedTargetSteps=500, howFar=2, rewardFunc=game.rewardFunc2, selfPlayFixOpp=False, startingModel=None, opponentConstr=None, opponentStrength=None): model = startingModel if startingModel is None: model = Sequential() model.add( Dense(70, kernel_initializer='lecun_uniform', activation='relu', input_shape=(2 * (4 + (2 * howFar + 1) * (2 * howFar + 1)), ))) model.add( Dense(35, kernel_initializer='lecun_uniform', activation='relu')) model.add( Dense(3, kernel_initializer='lecun_uniform', activation='linear')) model.compile(loss='mean_squared_error', optimizer=RMSprop()) episodes = p_episodes gamma = p_gamma startingTemperature = p_temperature temperature = p_temperature batchSize = 50 buffer = 500 replay = [] h = 0 targetModel = None updateStep = 0 for i in range(episodes): #init board board = game.Board(p_boardSizeX, p_boardSizeY) #set opponent if (opponentConstr is None): if (selfPlayFixOpp == True): oppModel = clone_model(model) oppModel.set_weights(model.get_weights()) board.setPlayers( game.AITrainingReduFeatWOPlayer("1", board, model, howFar), game.AITrainingReduFeatWOPlayer("2", board, oppModel, howFar)) else: board.setPlayers( game.AITrainingReduFeatWOPlayer("1", board, model, howFar), game.AITrainingReduFeatWOPlayer("2", board, model, howFar)) else: try: opp = opponentConstr("2", board, opponentStrength) except TypeError: opp = opponentConstr("2") board.setPlayers( game.AITrainingReduFeatWOPlayer("1", board, model, howFar), opp) #set start strategy board.startGameWithPseudoRandomStartPositions() while (board.checkGameStatus() == 0): #Boltzman action selection board.player1.getDirection() Qprobs = game.softmax(board.player1.vals / temperature) action_value = np.random.choice(Qprobs[0], p=Qprobs[0]) action = np.argmax(Qprobs[0] == action_value) - 1 #Take action, observe new state S' oldState = board.to01ReducedFeaturesWithOpponent( board.player1, board.player2, howFar).reshape(1, 2 * (4 + (2 * howFar + 1) * (2 * howFar + 1))) board.movePlayers(action, board.player2.getDirection()) newState = board.to01ReducedFeaturesWithOpponent( board.player1, board.player2, howFar).reshape(1, 2 * (4 + (2 * howFar + 1) * (2 * howFar + 1))) gameStatus = board.checkGameStatus() #Observe reward reward = rewardFunc(gameStatus) if (len(replay) < buffer): #if buffer not filled, add to it replay.append((oldState, action, reward, gameStatus, newState)) else: if (h < (buffer - 1)): h += 1 else: h = 0 replay[h] = (oldState, action, reward, gameStatus, newState) #randomly sample our experience replay memory minibatch = random.sample(replay, batchSize) X_train = [] y_train = [] #fixed target model if targetModel is None: targetModel = clone_model(model) targetModel.set_weights(model.get_weights()) elif updateStep % fixedTargetSteps == 0: targetModel = clone_model(model) targetModel.set_weights(model.get_weights()) for memory in minibatch: oldState, action, reward, gameStatus, newState = memory oldQvals = model.predict(oldState, batch_size=1) newQvals = targetModel.predict(newState, batch_size=1) maxQval = np.max(newQvals) y = np.zeros((1, 3)) y[:] = oldQvals[:] if gameStatus == 0: #non-terminal state update = (reward + (gamma * maxQval)) else: #terminal state update = reward y[0][action + 1] = update #action + 1 because actions are -1,0,1 X_train.append( oldState.reshape( 2 * (4 + (2 * howFar + 1) * (2 * howFar + 1)), )) y_train.append(y.reshape(3, )) X_train = np.array(X_train) y_train = np.array(y_train) print("Game #: %s" % (i, )) model.fit(X_train, y_train, batch_size=batchSize, epochs=1, verbose=1) updateStep += 1 if i % 10000 == 0: model.save(savePath) if temperature > 1.0: temperature -= (startingTemperature / episodes) else: temperature = 1.0 model.save(savePath)