def greedy_action(self, free_qblock_id_lists, collapsed_qttts, mark): assert len(collapsed_qttts) > 0 states = {} for i in range(len(collapsed_qttts)): if free_qblock_id_lists[i] is None: nstate = after_action_state(collapsed_qttts[i], None, mark) states[(i, -1, -1)] = GameTree.get_state_val(nstate) continue n = len(free_qblock_id_lists[i]) for j in range(n - 1): for k in range(j + 1, n): loc1 = free_qblock_id_lists[i][j] loc2 = free_qblock_id_lists[i][k] nstate = after_action_state(collapsed_qttts[i], (loc1, loc2), mark) states[(i, loc1, loc2)] = GameTree.get_state_val(nstate) if mark % 2 == 1: indices = GameTree.best_states(states, min) else: indices = GameTree.best_states(states, max) i, j, k = random.choice(indices) action = (collapsed_qttts[i], (j, k)) return action
def _learn(self, max_episode, save_as_file='TD_policy.dat'): env = Env() agents = [ TD_agent(self.epsilon, self.alpha, self.decay_rate), TD_agent(self.epsilon, self.alpha, self.decay_rate) ] for _ in tqdm(range(max_episode)): # reset to the initial state, env keep a counter for current round # odd round->x, even round->o, because for each piece, it has a submark on it! env.reset() for agent in agents: agent.decay_epsilon() while True: curr_qttt, mark = env.get_state() agent = ProgramDriver.get_agent_by_mark(agents, mark) free_qblock_id_lists, collapsed_qttts = env.get_valid_moves() collapsed_qttt, agent_move = agent.act(free_qblock_id_lists, collapsed_qttts, mark) next_qttt, next_round, reward, done = env.step( collapsed_qttt, agent_move, mark) agent.bellman_backup(curr_qttt, next_qttt, reward, mark) if done: GameTree.set_state_value(next_qttt.get_state(), reward) break ProgramDriver.save_model(save_as_file, max_episode, self.epsilon, self.alpha, self.decay_rate)
def __init__(self, initialState): # initialize and construct GameTree for AI self.gt = GameTree(initialState) self.gt.expand() # only happens once during initialization # keep track of current state self.currState = initialState
def load_model(filename): with open(filename, 'rb') as f: # read model info info = json.loads(f.readline().decode('ascii')) for line in f: elms = line.decode('ascii').split('\t') state = eval(elms[0]) val = eval(elms[1]) vcnt = eval(elms[2]) GameTree.load_state(state, val, vcnt) return info
def bellman_backup(self, qttt, next_qttt, reward, mark): """ Bellman backup for TD learning :param Qttt state: current state of qttt :param Qttt next_state: next state after action is take :param int reward: immediate reward for this round :return: None """ state_value = GameTree.get_state_val(qttt.get_state()) next_state_value = GameTree.get_state_val(next_qttt.get_state()) updated_state_value = state_value + self.alpha * ( reward + gamma * next_state_value - state_value) GameTree.set_state_value(qttt.get_state(), updated_state_value)
def dfs(self): #print "In dfs() of Play Class" #root node of type GameTree root=GameTree(self.startBoard) # possible_boards=root.getGameBoard().possibleBoards(); #print "Possible Board Values in Play Class::",possible_boards; for nextBoard in possible_boards: nextNode=GameTree(nextBoard); #print "NextBoard Value in dfs() of Play Class::",nextBoard; if(self.play(nextBoard,nextNode)): root.addChild(nextNode);
def astar(self,heuristic_val): #print "In dfs() of Play Class" #root node of type GameTree root=GameTree(self.startBoard) #Expand Boards Based On The The Heuristics possible_boards=root.getGameBoard().possibleBoards(); #print "Possible Board Values in Play Class::",possible_boards; for nextBoard in possible_boards: nextNode=GameTree(nextBoard); #print "NextBoard Value in dfs() of Play Class::",nextBoard; if(self.play(nextBoard,nextNode)): root.addChild(nextNode);
class Game: def __init__(self, initialState): # initialize and construct GameTree for AI self.gt = GameTree(initialState) self.gt.expand() # only happens once during initialization # keep track of current state self.currState = initialState def playOutGame(self, strat1, strat2): # returns path to get to end # play to end path = [] turnIter = 100 i = 1 while not self.currState.isTerminal(): if i % turnIter == 0: print('# game at {} turns'.format(i)) path.append(self.currState) if self.currState.turn == 1: self.currState = strat1.calcNextMove(self) else: self.currState = strat2.calcNextMove(self) i += 1 path.append(self.currState) return path def playPlayerGame(self, strat): while not self.currState.isTerminal(): print("GameTree length: {}".format(len(self.gt.getAllNodes()))) print(self.currState, '\n') # player's turn if self.currState.turn == 1: playerMove = input("You're up!\n") if playerMove[0] == 's': # split move self.currState = self.currState.splitMove( int(playerMove[6]), int(playerMove[8])) else: self.currState = self.currState.strikeMove( int(playerMove[0]), int(playerMove[2])) # computer's turn else: print("My turn!") self.currState = strat.calcNextMove(self) print(self.currState) print("Player {} wins!".format(self.currState.nextTurn()))
def AI_Move(self): start = time.time() self.winTime = 0 curBoard = copy.deepcopy(self.gb.board) curList = self.gb.card_list.copy() # print('preCard is: '+self.preCard) root = GameTree(curBoard, '0000', curList, self.preCard) curStep = 40 - (self.count_dict['color'] + self.count_dict['dot'] + self.step_dict['color'] + self.step_dict['dot']) minimax = MiniMax(curBoard, curList, curStep, self.current_turn, self.isPuring, self.var_heuristic.get()) minimax.generateTree(1, root, curStep) #sTime=time.time() command = minimax.miniMaxi(root).id #eTime=time.time() #print('minimax use time: '+str(eTime-sTime)+' (s)') if self.trace.get() == 1: minimax.writeFile() self.var_command.set(command) self.isAI = True self.button_move() self.isAI = False if self.winTime != 0: end = self.winTime else: end = time.time() self.lable_time['text'] = 'AI using time: ' + "{:.2f}".format(end - start) self.winTime = 0
def play_with_human(self, save_as_file='TD_human_policy.dat'): ProgramDriver.load_model(save_as_file) env = Env() agents = [ TD_agent(self.epsilon, self.alpha, self.decay_rate), HumanAgent(1), ] while True: env.reset() td_agent = agents[0] td_agent.decay_epsilon() env.render() while True: curr_qttt, mark = env.get_state() agent = ProgramDriver.get_agent_by_mark(agents, mark) free_qblock_id_lists, collapsed_qttts = env.get_valid_moves() collapsed_qttt, agent_move = agent.act(free_qblock_id_lists, collapsed_qttts, mark) if collapsed_qttt is None: ProgramDriver.save_model(save_as_file, 0, self.epsilon, self.alpha, self.decay_rate) print("Model saved.") sys.exit() next_qttt, next_round, reward, done = env.step( collapsed_qttt, agent_move, mark) print('') env.render() td_agent.bellman_backup(curr_qttt, next_qttt, reward, mark) if done: GameTree.set_state_value(next_qttt.get_state(), reward) next_qttt.show_result() break
def __init__(self, manticore=None): # user_account = m.create_account(balance=1000) # user_sol_account = m.solidity_create_contract(contract_src, owner=user_account) self.m = manticore self.contract_account = self.m.create_account(balance=1000) self.malicious_account = self.m.create_account(balance=1000) self.contract_sol_account = self.m.solidity_create_contract(contract_src, owner=self.contract_account) self.contract_sol_account._EVMContract__init_hashes() self.root = GameTree(self.m,self.contract_sol_account) self.symbolic_vars = {} self.z3_var_counter = 0 self.z3_func = {} self.lp = LtlParser()
def iddfs(self): root = GameTree(self.startBoard) depth_val = 0 ret_val = False #every time initial configuration and hence infinite loop #possible_boards=self.startBoard.possibleBoards(); #false value so run again #true value so stop while (not (ret_val)): #print "In while loop with depth::",depth_val ret_val = self.depth_ls(root, depth_val, self.startBoard) #Increase depth after each iteration depth_val = depth_val + 1
def save_model(save_file, max_episode, epsilon, alpha, decay_rate): with open(save_file, 'wt') as f: # write model info info = dict(type="td", max_episode=max_episode, epsilon=epsilon, alpha=alpha, decay_rate=decay_rate) # write state values f.write('{}\n'.format(json.dumps(info))) for state, value in GameTree.state_val.items(): # if value != 0: vcnt = GameTree.get_state_cnt(state) f.write('{}\t{:0.3f}\t{}\n'.format(state, value, vcnt))
def play(self, gb, parent): node_counter = 0 if (gb.finalBoard()): found = True else: found = False nextBoards = gb.possibleBoards() #print "NextBoard Values in Play Class::",nextBoards; for nextBoard in nextBoards: nextNode = GameTree(nextBoard) if (self.play(nextBoard, nextNode)): node_counter = node_counter + 1 parent.addChild(nextNode) print "Number Of Nodes Expanded::", node_counter return found
def dfs(self): #print "In dfs() of Play Class" #root node of type GameTree root = GameTree(self.startBoard) # possible_boards = root.getGameBoard().possibleBoards() #print "Possible Board Values in Play Class::",possible_boards; for nextBoard in possible_boards: nextNode = GameTree(nextBoard) #print "NextBoard Value in dfs() of Play Class::",nextBoard; if (self.play(nextBoard, nextNode)): root.addChild(nextNode)
def astar(self, heuristic_val): #print "In dfs() of Play Class" #root node of type GameTree root = GameTree(self.startBoard) #Expand Boards Based On The The Heuristics possible_boards = root.getGameBoard().possibleBoards() #print "Possible Board Values in Play Class::",possible_boards; for nextBoard in possible_boards: nextNode = GameTree(nextBoard) #print "NextBoard Value in dfs() of Play Class::",nextBoard; if (self.play(nextBoard, nextNode)): root.addChild(nextNode)
def depth_ls(self, parent, depth_val, gb): if (depth_val >= 0): #print "depth_val>=0::",depth_val>=0 nextBoards = gb.possibleBoards() #print "nextBoard::",nextBoards for nextBoard in nextBoards: nextNode = GameTree(nextBoard) if (self.depth_ls(nextNode, depth_val - 1, nextBoard)): parent.addChild(nextNode) #early detection for final board if (nextBoard.finalBoard()): return True else: #print "depth_val>=0",depth_val>=0 #invalid depth lookup so return #print "Accessing nodes at invalid depth so returing::" return False
from MetaGame import MetaGame from GameTree import GameTree import sys try: root = int(sys.argv[1]) tree = GameTree(MetaGame(root), 2) print(tree.root().best_move(1000).get_meta_int()) except ValueError: mode = sys.argv[1] if mode == 'p': root = int(sys.argv[2]) print(MetaGame(root)) elif mode == 'a': depth = int(sys.argv[2]) root = int(sys.argv[3]) print(GameTree(MetaGame(root), depth + 1))
from .TableManager import Table from .GameTree import GameTree, GameState from .GameTable import GameTable from .PrePostSim import PreSim, PostSim from .PickPocket.MoveGenerator.Evaluate import EvalNode from .PickPocket.utils.StopWatch import StopWatch import Ipc.commands as commands import json import time LookAheadDepth = 3 pipeline = PipeLine() pipeline.open() pipeline.WaitForArrival() gametree = GameTree() gametable = GameTable() PreSimTask = PreSim(gametable=gametable) VectorMath_StopWatch = StopWatch() def RecvGreetings(): for msg in pipeline.recvr.RecvAll(): print(msg) def CreatePreSimTask(move, table): def presim_task(): gametable.ReserveTable(move) cmd = commands.EncodeSGState(table, move) pipeline.sender.Send(cmd)