Example #1
0
def getNewPieces(pieces, playerMove):
    
    ## Get XY posistion of move. TicTacToeState uses (x,y) coordinates
    def getXY(index):
        if index == 0: return [0,0]
        if index == 1: return [0,1]
        if index == 2: return [0,2]
        if index == 3: return [1,0]
        if index == 4: return [1,1]
        if index == 5: return [1,2]
        if index == 6: return [2,0]
        if index == 7: return [2,1]
        if index == 8: return [2,2]
    ## Build Board
    board = [[None]*3 for j in range(3)]
    for i, piece in enumerate(pieces):
        row, col = getXY(i)
        board[row][col] = piece
        
    ## Player Move
    action = getXY(playerMove)
    gameState = TicTacToeState(board,'x')
    gameState = gameState.generateSuccessor(action)
    if None in pieces:
        ## Computer Move
        player = MinimaxAgent()
        action = player.getAction(gameState)
        if action != None:
            gameState = gameState.generateSuccessor(action)
    return gameState.board[0] + gameState.board[1] + gameState.board[2]
Example #2
0
 def evaluate(game):
     s = State()
     a = MinimaxAgent(max_depth=6, max_width=6)
     ss = []
     pp = []
     for x, y in game:
         d = a._get_dist(s)
         if len(d) != 1 or (d[0][0] >= 0 and d[0][1] >= 0):
             ss.append(s.featurize())
             pp.append(util.dist_to_prob(d))
         s.move(x, y)
     sys.stdout.write("=")
     sys.stdout.flush()
     return (np.array(ss), np.array(pp))
Example #3
0
 def __init__(self, master):
     tk.Frame.__init__(self, master)
     self.button = list()
     self.frames = list()
     self.state = State()
     root = path.join(path.dirname(__file__), "img")
     self.image = [
       tk.PhotoImage(file=path.join(root, "empty.gif")),
       tk.PhotoImage(file=path.join(root, "naught.gif")),
       tk.PhotoImage(file=path.join(root, "cross.gif")),
     ]
     self.agent = MinimaxAgent()
     self.last = None
     self.pack()
     self.create_widgets()
     self.recommend()
Example #4
0
class Application(tk.Frame):
    def __init__(self, master):
        tk.Frame.__init__(self, master)
        self.button = list()
        self.frames = list()
        self.state = State()
        root = path.join(path.dirname(__file__), "img")
        self.image = [
          tk.PhotoImage(file=path.join(root, "empty.gif")),
          tk.PhotoImage(file=path.join(root, "naught.gif")),
          tk.PhotoImage(file=path.join(root, "cross.gif")),
        ]
        self.agent = MinimaxAgent()
        self.last = None
        self.pack()
        self.create_widgets()
        self.recommend()

    def recommend(self):
        t = time()
        actions = self.agent.get_score(self.state)
        print("time elapsed: %f seconds" % (time() - t))
        if self.last is not None:
            for x, y, _ in self.last:
                button = self.button[np.ravel_multi_index((x, y), dims=(15, 15))]
                button.config(image=self.image[self.state.board[x, y]])
        for x, y, v in actions:
            button = self.button[np.ravel_multi_index((x, y), dims=(15, 15))]
            button.config(image="", text="%.02f" % v)
        self.last = actions

    def highlight(self, x, y):
        for i, j in self.state.highlight(x, y):
            self.frames[np.ravel_multi_index((i, j), dims=(15, 15))].config(padx=1, pady=1, bg="blue")

    def click(self, i, j):
        def respond(e):
            if not self.state.end and self.state.board[i, j] == 0:
                self.button[np.ravel_multi_index((i, j), dims=(15, 15))].config(image=self.image[self.state.player])
                self.state.move(i, j)
                if self.state.end:
                    if self.state.features["win-o"] + self.state.features["win-x"] > 0:
                        self.highlight(i, j)
                    else:
                        self.frames[np.ravel_multi_index((i, j), dims=(15, 15))].config(padx=1, pady=1, bg="red")
                else:
                    self.recommend()
        return respond

    def create_widgets(self):
        for i in range(15):
            for j in range(15):
                f = tk.Frame(self, height=50, width=50)
                f.pack_propagate(0)
                f.grid(row=i, column=j, padx=0, pady=0)
                self.frames.append(f)
                b = tk.Label(f, image=self.image[0], bg="yellow")
                b.pack(fill=tk.BOTH, expand=1)
                b.bind("<Button-1>", self.click(i, j))
                self.button.append(b)
Example #5
0
else:
    print('hell no')


#test getLegalActions
print(state.getLegalActions(1))

#test generateSuccessor
newState = state.generateSuccessor(1, 1, 'switch')
print(newState.currAgent)

newState = state.generateSuccessor(1, 'thunderbolt', 'moves')
print(newState.opp)
'''
#test minimax agent
alg = MinimaxAgent(3)
'''
action, movType = alg.getAction(state)
print('my action: ', action, movType)
state = state.generateSuccessor(1, action, movType)

enemyaction, movType = alg.getEnemyAction(state)
print('opp action: ', enemyaction, movType)
state = state.generateSuccessor(-1, enemyaction, movType)
'''
while not state.isEnd():
    action, movType = alg.getAction(state)
    state = state.generateSuccessor(1, action, movType)
    print(action, movType)
    if state.isEnd():
        break
Example #6
0
        if gui_active:     
            win.updateSprites(state)
            win.refresh()
        
    if verbose > 0:
        state.printGrid(game.grid_size)

    return state

if __name__ ==  "__main__":
    if len(sys.argv) > 1:
        max_iter = int(sys.argv[1])
    else:
        max_iter = None


    minimax_agent = MinimaxAgent(depth=lambda s,a: 2)
    alphabeta_agent = AlphaBetaAgent(depth=lambda s,a: survivorDfunc(s, a, 4, 0.5), evalFn=greedyEvaluationFunction)
    expectimax_agent = ExpectimaxAgent(depth=lambda s,a: cowardCenterDepthFunction(s, a, 2), evalFn=greedyEvaluationFunction)
    
    strategies = [smartGreedyStrategy, opportunistStrategy, alphabeta_agent.getAction]

    # add a human player
    # strategies = [humanStrategy, smartGreedyStrategy, opportunistStrategy, alphabeta_agent.getAction]

    # add an RL agent
    featureExtractor = FeatureExtractor(len(strategies), grid_size = 20, radius_ = 10)
    rlStrategy = load_rl_strategy("nn-nn1-r10-1b.p", strategies, featureExtractor, discount = 0.9, q_type = "nn")
    strategies.append(rlStrategy)

    controller(strategies, 20, max_iter = max_iter, gui_active = True, verbose = 0, game_speed = 10)
Example #7
0
""" battle arena between agents """

import argparse
import numpy as np
import tensorflow as tf
from time import time
from state import State
from minimax import MinimaxAgent
from mcts_agent import MCTSAgent

NUM_GAMES = 2

with tf.Session() as sess:
    mcts = MCTSAgent(sess, "dualsup", chkpnt=3000)
    agent = MinimaxAgent()
    print("ARENA: %s-%d VERSES %s-%d" %
          (mcts.model_name, mcts.chkpnt, "minimax", 0))

    stat = np.zeros(shape=(2, 2), dtype=np.int)
    for i in range(NUM_GAMES):
        t = time()
        s = State()
        a_is_black = (i % 2 == 0)
        while not s.end and len(s.history) < 225:
            if a_is_black == (s.player > 0):
                s.move(*mcts.get_action(s, deterministic=True))
                mcts.update(s)
            else:
                s.move(*agent.get_action(s))
                mcts.update(s)
        mcts.refresh()
Example #8
0
from os import path, listdir
from sys import stdout, argv
from minimax import MinimaxAgent

if __name__ == '__main__':
    if len(argv) != 3:
        print("Usage: python battle_minimax.py [total] [multiple]")
    else:
        total = int(argv[1])
        rest = int(argv[2])
        latest = -1
        for f in listdir(path.join(path.dirname(__file__), "data", "minimax")):
            if f.endswith(".pkl"):
                latest = max(latest, int(f.split(".")[0]))

        agent = MinimaxAgent(max_depth=6, max_width=8)
        names = ["draw", "black", "white"]
        for i in count(latest + 1):
            if i % total == rest:
                print("[INFO] game %d begin" % i)
                begin = time()
                state = State()
                while len(state.history) != 225 and not state.end:
                    x, y = agent.get_action(state)
                    state.move(x, y)
                    stdout.write(".")
                    stdout.flush()
                winner = state.player if state.end else 0
                with open(
                        path.join(path.dirname(__file__), "data", "minimax",
                                  "%d.pkl" % i), "wb") as out:
Example #9
0
                                    "x:0": board
                                }).reshape(225)
                y = np.exp(y)
                y = y / y.sum()
                self.dist_queue.put(y)

    def get_action(self, state):
        if len(state.history) == 0:
            return (7, 7)
        self.state_queue.put(state)
        prob = self.dist_queue.get()
        return np.unravel_index(np.random.choice(225, p=prob), dims=(15, 15))


agents = {
    "minimax": lambda which: MinimaxAgent(max_depth=6, max_width=8),
    "monet": lambda which: NetAgent(which),
}
players = [0, agents[argv[1]]("black"), agents[argv[2]]("white")]

names = ["", "black", "white"]
state = State()
while len(state.history) != 225 and not state.end:
    t = time()
    x, y = players[state.player].get_action(state)
    print("%s [%g seconds]" % (names[state.player], time() - t))
    state.move(x, y)
winner = state.player if state.end else 0
with open(
        path.join(path.dirname(__file__), "data",
                  "battle-%s-%s.pkl" % (argv[1], argv[2])), "wb") as out:
Example #10
0
    def create_widgets(self):
        for i in range(15):
            for j in range(15):
                f = tk.Frame(self, height=50, width=50)
                f.pack_propagate(0)
                f.grid(row=i, column=j, padx=0, pady=0)
                self.frames.append(f)
                b = tk.Label(f, image=self.image[0], bg="yellow")
                b.pack(fill=tk.BOTH, expand=1)
                b.bind("<Button-1>", self.click(i, j))
                self.button.append(b)

root = tk.Tk()
root.wm_title("Alpha Gomoku")
root.attributes("-topmost", True)

with tf.Session() as sess:
    parser = argparse.ArgumentParser()
    parser.add_argument("model_name", type=str)
    parser.add_argument("--chkpnt", "-c", type=int)
    parser.add_argument("--ensemble", "-e", action="store_true")
    args = parser.parse_args()
    if args.model_name == "minimax":
        agent = MinimaxAgent(max_depth=6, max_width=6)
    elif args.model_name == "mininet":
        agent = MCTSMinimaxAgent(sess, "supervised", chkpnt=args.chkpnt)
    else:
        agent = Agent(sess, args.model_name, chkpnt=args.chkpnt)
    app = Application(agent, root, ensemble=args.ensemble)
    app.mainloop()