def getMove(self, state): # rotate the board for current player player = s.getCurrentPlayer(state) if player != 0: flip = True board = s.flipBoard(state) else: flip = False board = state # get output of neural network fd = {self.x: [self.makeInputVector(board[:14])], self.keep_prob: 1.0} y = self.sess.run(self.y, fd) # y is a list containing a single output vector # y == [[0.0108906 0.1377293 0.370027 0.2287382 0.0950692 0.1575449]] scores = list(y[0]) # we only want to pick from legal moves (the nn will learn these # eventually, but we're helping him with this constraint) legalMoves = s.getLegalMoves(board) move = self.chooseMoveRandomly(scores, legalMoves) if len([x for x in scores if math.isnan(x)]) > 0: logger.error(self.name + " returned NaN!") # if we rotated the board before, rotate it back if flip: move = s.flipMove(move, player) return move
def play_one_game(players, lucky): game = s.init() done = False moves = [] while not done: # do move for someone player = s.getCurrentPlayer(game) if needRandomMove(len(moves)): move = lucky.move(game) else: move = players[player]['ai'].move(game) if move is None: logger.error("null move! ", game) mt = [s.flipBoardCurrentPlayer(game), s.flipMove(move, player), player] moves.append(mt) game = s.doMove(game, move) done = s.isGameOver(game) winner = s.getWinner(game) score = s.getScore(game) # make training set with move, gamestate, and 1 for win, 0 for lose trainingset = [ d[0:2] + [int(winner == d[2])] + list(score)[::1 - d[2] * 2] for d in moves ] for move in trainingset: results.info(move) i = 0 for p in players: isWinner = (1 if i == winner else 0) p['ai'].gameOver(isWinner) p['wins'] += isWinner i += 1 return (winner, trainingset)
def play_game(players): game = s.init() done = False moves = [] while not done: # do move for someone player = s.getCurrentPlayer(game) move = players[player]['ai'].move(game) if move is None: print("null move! ", game) mt = { "move": s.flipMove(move, player), "board": s.flipBoardCurrentPlayer(game), "player": player, "name": players[player]['module'].__name__ } moves.append(mt) game = s.doMove(game, move) done = s.isGameOver(game) winner = s.getWinner(game) # make training set with move, gamestate, and 1 for win, 0 for lose trainingset = [dict(d, winner=int(winner == d['player'])) for d in moves] i = 0 for p in players: p['ai'].gameOver(i == winner) i += 1 return (winner, trainingset)
def move(self, state): player = s.getCurrentPlayer(state) if player != 0: flip = True board = s.flipBoard(state) else: flip = False board = state move = self.nn.getMove(board) if flip: move = s.flipMove(move, player) return move
def move(self, state): # rotate the board for current player player = s.getCurrentPlayer(state) if player != 0: flip = True board = s.flipBoard(state) else: flip = False board = state # pick best move vector = moveToVector(board) bestmove = None bestscore = 0 for i in range(6): move = 5 - i if vector[move] > bestscore: bestmove = move bestscore = vector[move] # flip move if flip: bestmove = s.flipMove(bestmove, player) return bestmove