def run(self): while self.playerIds: self.game_count += 1 print("Game " + str(self.game_count)) game_session = GameSession(self.playerIds, self.dice_count) game_session.run() self.read_players()
def addSession(self, owner, ownerId, gamename): validAddSession = False if not self.gameExist(gamename): newGame = GameSession(owner, ownerId, gamename) self.sessionArr.append([owner, gamename, newGame]) validAddSession = True return validAddSession
def runTestPlayRandomGames(numGames, reproSeed): if reproSeed < 0: print('Running', numGames, 'random test games...') for _ in tqdm(range(numGames)): seed = random.randrange(999999) random.seed(seed) np.random.seed(seed) session = GameSession() session.randomSeed = seed while not session.isFinished(): playerId = session.getCurrentPlayer() state = session.getState(playerId) actions = session.getAllowedActions(state) action = random.choice(actions) session.step(action) else: random.seed(reproSeed) np.random.seed(reproSeed) session = GameSession() session.game.debug = True session.randomSeed = reproSeed print('DEBUG game') print(' mods:', [mod.name for mod in session.game.mods]) print(' blue:', session.game.blueDeck) print(' red:', session.game.redDeck) while not session.isFinished(): playerId = session.getCurrentPlayer() state = session.getState(playerId) actions = session.getAllowedActions(state) action = random.choice(actions) print( 'Move, player:%d numActions:%d (board:%d + card:%d) => action:%d (board:%d + card:%d)' % (playerId, len(actions), sum([state[i] for i in range(9)]), sum([state[9 + i] for i in range(5) ]), action, action % 9, int(action / 9))) session.step(action)
def trainModel(): numEpochs = 20 numGamesToTrain = 200 numGamesToEval = 1000 game = GameSession(useModifiers=True) trainingAgent = AgentDQN(game) randomAgent = AgentRandom(game) learningEnv = Environment(game, [trainingAgent, trainingAgent]) evalEnv = Environment(game, [trainingAgent, randomAgent]) # load checkpoint data #trainingAgent.load('data/model.tmp') print('Start training, epochs:', numEpochs) timeStart = time.time() history = [] for epochIdx in range(numEpochs): # play a lot and learn and stuff for _ in tqdm(range(numGamesToTrain), leave=False, desc='Training'.ljust(15)): learningEnv.runTrainingGame() # training update after exploration step trainingAgent.train() # eval with same seed for every iteration randomAgent.setSeed(0) score = 0 for _ in tqdm(range(numGamesToEval), leave=False, desc='Evaluating'.ljust(15)): rewards = evalEnv.runEvalGame() score += rewards[0] # collect info about training to see if it's actually working (lol, who am i kidding, it just fails constantly T.T) stepDetails = trainingAgent.getTrainingDetails() stepDetails['score'] = score history.append(stepDetails) print('[%d] %s' % (epochIdx, stepDetails)) timeElapsed = time.time() - timeStart print('Done. Total time:%s, epoch avg:%.0fs' % (time.strftime( "%H:%M:%Ss", time.gmtime(timeElapsed)), timeElapsed / numEpochs)) trainingAgent.save('data/model.tmp') trainingAgent.generateModelCode('data/model.cs') return history
def runTestAgentEval(numGames, agentName): print('Running', numGames, 'eval games for agent', agentName) game = GameSession() evalAgent = AgentRandom(game) if agentName == 'dqn': evalAgent = AgentDQN(game) env = Environment(game, [evalAgent, AgentRandom(game)]) score = 0 for _ in tqdm(range(numGames)): rewards = env.runEvalGame() score += rewards[0]
""" Example of use of GameSession and Engine to make Stockfish play itself. """ from gameSession import GameSession from engine import Engine stockfishpath = './Stockfish/src/stockfish' eng1 = Engine(stockfishpath) eng2 = Engine(stockfishpath) GameSession(eng1, eng2).play() GameSession(eng1, eng2, time=(300e3, 5e3)).play()