예제 #1
0
파일: rl_agent.py 프로젝트: yehanz/Qttt_RL
    def greedy_action(self, free_qblock_id_lists, collapsed_qttts, mark):
        assert len(collapsed_qttts) > 0
        states = {}
        for i in range(len(collapsed_qttts)):
            if free_qblock_id_lists[i] is None:
                nstate = after_action_state(collapsed_qttts[i], None, mark)
                states[(i, -1, -1)] = GameTree.get_state_val(nstate)
                continue
            n = len(free_qblock_id_lists[i])
            for j in range(n - 1):
                for k in range(j + 1, n):
                    loc1 = free_qblock_id_lists[i][j]
                    loc2 = free_qblock_id_lists[i][k]
                    nstate = after_action_state(collapsed_qttts[i],
                                                (loc1, loc2), mark)
                    states[(i, loc1, loc2)] = GameTree.get_state_val(nstate)
        if mark % 2 == 1:
            indices = GameTree.best_states(states, min)
        else:
            indices = GameTree.best_states(states, max)

        i, j, k = random.choice(indices)

        action = (collapsed_qttts[i], (j, k))
        return action
예제 #2
0
파일: rl_agent.py 프로젝트: yehanz/Qttt_RL
    def _learn(self, max_episode, save_as_file='TD_policy.dat'):
        env = Env()
        agents = [
            TD_agent(self.epsilon, self.alpha, self.decay_rate),
            TD_agent(self.epsilon, self.alpha, self.decay_rate)
        ]

        for _ in tqdm(range(max_episode)):
            # reset to the initial state, env keep a counter for current round
            # odd round->x, even round->o, because for each piece, it has a submark on it!
            env.reset()
            for agent in agents:
                agent.decay_epsilon()

            while True:
                curr_qttt, mark = env.get_state()

                agent = ProgramDriver.get_agent_by_mark(agents, mark)

                free_qblock_id_lists, collapsed_qttts = env.get_valid_moves()

                collapsed_qttt, agent_move = agent.act(free_qblock_id_lists,
                                                       collapsed_qttts, mark)

                next_qttt, next_round, reward, done = env.step(
                    collapsed_qttt, agent_move, mark)

                agent.bellman_backup(curr_qttt, next_qttt, reward, mark)

                if done:
                    GameTree.set_state_value(next_qttt.get_state(), reward)
                    break

        ProgramDriver.save_model(save_as_file, max_episode, self.epsilon,
                                 self.alpha, self.decay_rate)
예제 #3
0
    def __init__(self, initialState):

        # initialize and construct GameTree for AI
        self.gt = GameTree(initialState)
        self.gt.expand()  # only happens once during initialization

        # keep track of current state
        self.currState = initialState
예제 #4
0
파일: rl_agent.py 프로젝트: yehanz/Qttt_RL
 def load_model(filename):
     with open(filename, 'rb') as f:
         # read model info
         info = json.loads(f.readline().decode('ascii'))
         for line in f:
             elms = line.decode('ascii').split('\t')
             state = eval(elms[0])
             val = eval(elms[1])
             vcnt = eval(elms[2])
             GameTree.load_state(state, val, vcnt)
     return info
예제 #5
0
파일: rl_agent.py 프로젝트: yehanz/Qttt_RL
 def bellman_backup(self, qttt, next_qttt, reward, mark):
     """
     Bellman backup for TD learning
     :param Qttt state: current state of qttt
     :param Qttt next_state: next state after action is take
     :param int  reward: immediate reward for this round
     :return: None
     """
     state_value = GameTree.get_state_val(qttt.get_state())
     next_state_value = GameTree.get_state_val(next_qttt.get_state())
     updated_state_value = state_value + self.alpha * (
         reward + gamma * next_state_value - state_value)
     GameTree.set_state_value(qttt.get_state(), updated_state_value)
    def dfs(self):
        #print "In dfs() of Play Class"

        #root node of type GameTree
        root=GameTree(self.startBoard)

        #
        possible_boards=root.getGameBoard().possibleBoards();

        #print "Possible Board Values in Play Class::",possible_boards;
        for nextBoard in possible_boards:
            nextNode=GameTree(nextBoard);
            #print "NextBoard Value in dfs() of Play Class::",nextBoard;
            if(self.play(nextBoard,nextNode)):
                root.addChild(nextNode);
    def astar(self,heuristic_val):
            #print "In dfs() of Play Class"

        #root node of type GameTree
        root=GameTree(self.startBoard)

        #Expand Boards Based On The The Heuristics
        possible_boards=root.getGameBoard().possibleBoards();

        #print "Possible Board Values in Play Class::",possible_boards;
        for nextBoard in possible_boards:
            nextNode=GameTree(nextBoard);
            #print "NextBoard Value in dfs() of Play Class::",nextBoard;
            if(self.play(nextBoard,nextNode)):
                root.addChild(nextNode);
예제 #8
0
class Game:
    def __init__(self, initialState):

        # initialize and construct GameTree for AI
        self.gt = GameTree(initialState)
        self.gt.expand()  # only happens once during initialization

        # keep track of current state
        self.currState = initialState

    def playOutGame(self, strat1, strat2):  # returns path to get to end
        # play to end
        path = []
        turnIter = 100
        i = 1
        while not self.currState.isTerminal():
            if i % turnIter == 0:
                print('# game at {} turns'.format(i))
            path.append(self.currState)
            if self.currState.turn == 1:
                self.currState = strat1.calcNextMove(self)
            else:
                self.currState = strat2.calcNextMove(self)
            i += 1
        path.append(self.currState)
        return path

    def playPlayerGame(self, strat):
        while not self.currState.isTerminal():
            print("GameTree length: {}".format(len(self.gt.getAllNodes())))
            print(self.currState, '\n')

            # player's turn
            if self.currState.turn == 1:
                playerMove = input("You're up!\n")
                if playerMove[0] == 's':  # split move
                    self.currState = self.currState.splitMove(
                        int(playerMove[6]), int(playerMove[8]))
                else:
                    self.currState = self.currState.strikeMove(
                        int(playerMove[0]), int(playerMove[2]))
            # computer's turn
            else:
                print("My turn!")
                self.currState = strat.calcNextMove(self)

        print(self.currState)
        print("Player {} wins!".format(self.currState.nextTurn()))
예제 #9
0
    def AI_Move(self):
        start = time.time()
        self.winTime = 0
        curBoard = copy.deepcopy(self.gb.board)
        curList = self.gb.card_list.copy()
        #         print('preCard is: '+self.preCard)
        root = GameTree(curBoard, '0000', curList, self.preCard)
        curStep = 40 - (self.count_dict['color'] + self.count_dict['dot'] +
                        self.step_dict['color'] + self.step_dict['dot'])

        minimax = MiniMax(curBoard, curList, curStep, self.current_turn,
                          self.isPuring, self.var_heuristic.get())

        minimax.generateTree(1, root, curStep)

        #sTime=time.time()
        command = minimax.miniMaxi(root).id
        #eTime=time.time()
        #print('minimax use time: '+str(eTime-sTime)+' (s)')
        if self.trace.get() == 1:
            minimax.writeFile()

        self.var_command.set(command)
        self.isAI = True
        self.button_move()
        self.isAI = False
        if self.winTime != 0:
            end = self.winTime

        else:
            end = time.time()
        self.lable_time['text'] = 'AI using time: ' + "{:.2f}".format(end -
                                                                      start)
        self.winTime = 0
예제 #10
0
파일: rl_agent.py 프로젝트: yehanz/Qttt_RL
    def play_with_human(self, save_as_file='TD_human_policy.dat'):
        ProgramDriver.load_model(save_as_file)
        env = Env()
        agents = [
            TD_agent(self.epsilon, self.alpha, self.decay_rate),
            HumanAgent(1),
        ]

        while True:
            env.reset()
            td_agent = agents[0]
            td_agent.decay_epsilon()
            env.render()

            while True:
                curr_qttt, mark = env.get_state()

                agent = ProgramDriver.get_agent_by_mark(agents, mark)

                free_qblock_id_lists, collapsed_qttts = env.get_valid_moves()

                collapsed_qttt, agent_move = agent.act(free_qblock_id_lists,
                                                       collapsed_qttts, mark)

                if collapsed_qttt is None:
                    ProgramDriver.save_model(save_as_file, 0, self.epsilon,
                                             self.alpha, self.decay_rate)
                    print("Model saved.")
                    sys.exit()

                next_qttt, next_round, reward, done = env.step(
                    collapsed_qttt, agent_move, mark)

                print('')
                env.render()

                td_agent.bellman_backup(curr_qttt, next_qttt, reward, mark)

                if done:
                    GameTree.set_state_value(next_qttt.get_state(), reward)
                    next_qttt.show_result()
                    break
예제 #11
0
 def __init__(self, manticore=None):
     # user_account = m.create_account(balance=1000)
     # user_sol_account = m.solidity_create_contract(contract_src, owner=user_account)
     self.m = manticore
     self.contract_account = self.m.create_account(balance=1000)
     self.malicious_account = self.m.create_account(balance=1000)
     self.contract_sol_account = self.m.solidity_create_contract(contract_src, owner=self.contract_account)
     self.contract_sol_account._EVMContract__init_hashes()
     self.root = GameTree(self.m,self.contract_sol_account)
     self.symbolic_vars = {}
     self.z3_var_counter = 0
     self.z3_func = {}
     self.lp = LtlParser()
 def iddfs(self):
     root = GameTree(self.startBoard)
     depth_val = 0
     ret_val = False
     #every time initial configuration and hence infinite loop
     #possible_boards=self.startBoard.possibleBoards();
     #false value so run again
     #true value so stop
     while (not (ret_val)):
         #print "In while loop with depth::",depth_val
         ret_val = self.depth_ls(root, depth_val, self.startBoard)
         #Increase depth after each iteration
         depth_val = depth_val + 1
예제 #13
0
파일: rl_agent.py 프로젝트: yehanz/Qttt_RL
 def save_model(save_file, max_episode, epsilon, alpha, decay_rate):
     with open(save_file, 'wt') as f:
         # write model info
         info = dict(type="td",
                     max_episode=max_episode,
                     epsilon=epsilon,
                     alpha=alpha,
                     decay_rate=decay_rate)
         # write state values
         f.write('{}\n'.format(json.dumps(info)))
         for state, value in GameTree.state_val.items():
             # if value != 0:
             vcnt = GameTree.get_state_cnt(state)
             f.write('{}\t{:0.3f}\t{}\n'.format(state, value, vcnt))
 def play(self, gb, parent):
     node_counter = 0
     if (gb.finalBoard()):
         found = True
     else:
         found = False
         nextBoards = gb.possibleBoards()
         #print "NextBoard Values in Play Class::",nextBoards;
         for nextBoard in nextBoards:
             nextNode = GameTree(nextBoard)
             if (self.play(nextBoard, nextNode)):
                 node_counter = node_counter + 1
                 parent.addChild(nextNode)
     print "Number Of Nodes Expanded::", node_counter
     return found
    def dfs(self):
        #print "In dfs() of Play Class"

        #root node of type GameTree
        root = GameTree(self.startBoard)

        #
        possible_boards = root.getGameBoard().possibleBoards()

        #print "Possible Board Values in Play Class::",possible_boards;
        for nextBoard in possible_boards:
            nextNode = GameTree(nextBoard)
            #print "NextBoard Value in dfs() of Play Class::",nextBoard;
            if (self.play(nextBoard, nextNode)):
                root.addChild(nextNode)
    def astar(self, heuristic_val):
        #print "In dfs() of Play Class"

        #root node of type GameTree
        root = GameTree(self.startBoard)

        #Expand Boards Based On The The Heuristics
        possible_boards = root.getGameBoard().possibleBoards()

        #print "Possible Board Values in Play Class::",possible_boards;
        for nextBoard in possible_boards:
            nextNode = GameTree(nextBoard)
            #print "NextBoard Value in dfs() of Play Class::",nextBoard;
            if (self.play(nextBoard, nextNode)):
                root.addChild(nextNode)
    def depth_ls(self, parent, depth_val, gb):
        if (depth_val >= 0):
            #print "depth_val>=0::",depth_val>=0
            nextBoards = gb.possibleBoards()
            #print "nextBoard::",nextBoards

            for nextBoard in nextBoards:

                nextNode = GameTree(nextBoard)

                if (self.depth_ls(nextNode, depth_val - 1, nextBoard)):
                    parent.addChild(nextNode)

                #early detection for final board
                if (nextBoard.finalBoard()):
                    return True
        else:
            #print "depth_val>=0",depth_val>=0
            #invalid depth lookup so return
            #print "Accessing nodes at invalid depth so returing::"
            return False
예제 #18
0
from MetaGame import MetaGame
from GameTree import GameTree
import sys

try:
    root = int(sys.argv[1])
    tree = GameTree(MetaGame(root), 2)
    print(tree.root().best_move(1000).get_meta_int())
except ValueError:
    mode = sys.argv[1]
    if mode == 'p':
        root = int(sys.argv[2])
        print(MetaGame(root))
    elif mode == 'a':
        depth = int(sys.argv[2])
        root = int(sys.argv[3])
        print(GameTree(MetaGame(root), depth + 1))
예제 #19
0
    from .TableManager import Table
    from .GameTree import GameTree, GameState
    from .GameTable import GameTable
    from .PrePostSim import PreSim, PostSim
    from .PickPocket.MoveGenerator.Evaluate import EvalNode
    from .PickPocket.utils.StopWatch import StopWatch

import Ipc.commands as commands
import json
import time

LookAheadDepth = 3
pipeline = PipeLine()
pipeline.open()
pipeline.WaitForArrival()
gametree = GameTree()
gametable = GameTable()
PreSimTask = PreSim(gametable=gametable)
VectorMath_StopWatch = StopWatch()


def RecvGreetings():
    for msg in pipeline.recvr.RecvAll():
        print(msg)


def CreatePreSimTask(move, table):
    def presim_task():
        gametable.ReserveTable(move)
        cmd = commands.EncodeSGState(table, move)
        pipeline.sender.Send(cmd)