def test_possible_moves(self): r = game.possible_moves(0) self.assertEqual(r, []) r = game.possible_moves(0b111111111111111111111111111111111111111111000000000000000000000) self.assertEqual(r, []) r = game.possible_moves(0b000000000000000000000000000000000000000000110110110110110110110) self.assertEqual(r, [0, 1, 2, 3, 4, 5, 6])
def test_possible_moves(self): r = game.possible_moves(0) self.assertEqual(r, []) r = game.possible_moves( 0b111111111111111111111111111111111111111111000000000000000000000) self.assertEqual(r, []) r = game.possible_moves( 0b000000000000000000000000000000000000000000110110110110110110110) self.assertEqual(r, [0, 1, 2, 3, 4, 5, 6])
def find_leaf(self, state_int, player): """ Traverse the tree until the end of game or leaf node :param state_int: root node state :param player: player to move :return: tuple of (value, leaf_state, player, states, actions) 1. value: None if leaf node, otherwise equals to the game outcome for the player at leaf 2. leaf_state: state_int of the last state 3. player: player at the leaf node 4. states: list of states traversed 5. list of actions taken """ states = [] actions = [] cur_state = state_int cur_player = player value = None while not self.is_leaf(cur_state): states.append(cur_state) counts = self.visit_count[cur_state] total_sqrt = m.sqrt(sum(counts)) probs = self.probs[cur_state] values_avg = self.value_avg[cur_state] # choose action to take, in the root node add the Dirichlet noise to the probs if cur_state == state_int: noises = np.random.dirichlet( [0.03] * game.GAME_COLS) probs = [ 0.75 * prob + 0.25 * noise for prob, noise in zip(probs, noises) ] score = [ value + self.c_puct*prob*total_sqrt/(1+count) for value, prob, count in zip(values_avg, probs, counts) ] invalid_actions = set(range(game.GAME_COLS)) - \ set(game.possible_moves(cur_state)) for invalid in invalid_actions: score[invalid] = -np.inf action = int(np.argmax(score)) actions.append(action) cur_state, won = game.move( cur_state, action, cur_player) if won: # if somebody won the game, the value of the final state is -1 (as it is on opponent's turn) value = -1.0 cur_player = 1-cur_player # check for the draw moves_count = len(game.possible_moves(cur_state)) if value is None and moves_count == 0: value = 0.0 return value, cur_state, cur_player, states, actions
def find_leaf(self, state_int, player): """ Traverse the tree until the end of game or leaf node :param state_int: root node state :param player: player to move :return: tuple of (value, leaf_state, player, states, actions) 1. value: None if leaf node, otherwise equals to the game outcome for the player at leaf 2. leaf_state: state_int of the last state 3. player: player at the leaf node 4. states: list of states traversed 5. list of actions taken """ states = [] actions = [] cur_state = state_int cur_player = player value = None while not self.is_leaf(cur_state): states.append(cur_state) counts = self.visit_count[cur_state] total_sqrt = m.sqrt(sum(counts)) probs = self.probs[cur_state] values_avg = self.value_avg[cur_state] # choose action to take, in the root node add the Dirichlet noise to the probs if cur_state == state_int: noises = np.random.dirichlet([0.03] * game.GAME_COLS) probs = [0.75 * prob + 0.25 * noise for prob, noise in zip(probs, noises)] score = [value + self.c_puct * prob * total_sqrt / (1 + count) for value, prob, count in zip(values_avg, probs, counts)] invalid_actions = set(range(game.GAME_COLS)) - set(game.possible_moves(cur_state)) for invalid in invalid_actions: score[invalid] = -np.inf action = int(np.argmax(score)) actions.append(action) cur_state, won = game.move(cur_state, action, cur_player) if won: # if somebody won the game, the value of the final state is -1 (as it is on opponent's turn) value = -1.0 cur_player = 1-cur_player # check for the draw if value is None and len(game.possible_moves(cur_state)) == 0: value = 0.0 return value, cur_state, cur_player, states, actions
def find_leaf(self, state_int, player, step): states = [] actions = [] cur_state = state_int cur_player = player value = None while value is None and not self.is_leaf(cur_state): states.append(cur_state) counts = self.visit_count[cur_state] total_sqrt = math.sqrt(sum(counts)) probs = self.probs[cur_state] values_avg = self.value_avg[cur_state] movel = game.possible_moves(cur_state, cur_player, step) alen = len(actions) if alen < 1: noises = np.random.dirichlet([0.17] * len(movel)) max_score = -np.inf chList = actionTable.choList if cur_player < 1 else actionTable.hanList chDict = actionTable.choDict if cur_player < 1 else actionTable.hanDict for i, m in enumerate(movel): idx = chDict[m] score = values_avg[idx] + self.c_puct * ( probs[idx] if alen else 0.75 * probs[idx] + 0.25 * noises[i]) * total_sqrt / (1 + counts[idx]) if score > max_score: max_score = score aidx = idx action = chList[aidx] actions.append(aidx) cur_state, won = game.move(cur_state, action, step) if won > 0: # if somebody won the game, the value of the final state is -1 (as it is on opponent's turn) value = -1.0 if won - 1 == cur_player else 1.0 cur_player = 1 - cur_player step += 1 if value != None: value *= 1 - (game.MAX_TURN - step) / 1000 return value, cur_state, step, states, actions
net.train() replay_buffer = collections.deque(maxlen=REPLAY_BUFFER) f = open("./train.dat", "r") ptime = time.time() while True: for lidx in range(PLAY_EPISODES): pan = game.encode_lists([list(i) for i in game.INITIAL_STATE], 0) s = f.readline() if len(s)<5: lidx -= 1; break js = json.loads(s) result = -js["result"] for idx, (action, probs) in enumerate(js["action"]): movelist = game.possible_moves(pan, idx%2, idx) #if action not in movelist: # print("Impossible action selected %d %d"%(step_idx, lidx)) probs1 = [0.0] * actionTable.AllMoveLength for n in probs: probs1[n[0]] = n[1] replay_buffer.append((pan, idx, probs1, result)) pan, _ = game.move(pan, action, idx) if idx!=1: result = -result if lidx < 0: break print(step_idx, end=' ') step_idx += 1 if len(replay_buffer) < MIN_REPLAY_TO_TRAIN: continue
def play_game(mcts_stores, replay_buffer, net1, net2, steps_before_tau_0, mcts_searches, mcts_batch_size, net1_plays_first=None, cuda=False): """ Play one single game, memorizing transitions into the replay buffer :param mcts_stores: could be None or single MCTS or two MCTSes for individual net :param replay_buffer: queue with (state, probs, values), if None, nothing is stored :param net1: player1 :param net2: player2 :return: value for the game in respect to player1 (+1 if p1 won, -1 if lost, 0 if draw) """ assert isinstance(replay_buffer, (collections.deque, type(None))) assert isinstance(mcts_stores, (mcts.MCTS, type(None), list)) assert isinstance(net1, Net) assert isinstance(net2, Net) assert isinstance(steps_before_tau_0, int) and steps_before_tau_0 >= 0 assert isinstance(mcts_searches, int) and mcts_searches > 0 assert isinstance(mcts_batch_size, int) and mcts_batch_size > 0 if mcts_stores is None: mcts_stores = [mcts.MCTS(), mcts.MCTS()] elif isinstance(mcts_stores, mcts.MCTS): mcts_stores = [mcts_stores, mcts_stores] state = game.INITIAL_STATE nets = [net1, net2] if net1_plays_first is None: cur_player = np.random.choice(2) else: cur_player = 0 if net1_plays_first else 1 step = 0 tau = 1 if steps_before_tau_0 > 0 else 0 game_history = [] result = None net1_result = None while result is None: mcts_stores[cur_player].search_batch(mcts_searches, mcts_batch_size, state, cur_player, nets[cur_player], cuda=cuda) probs, _ = mcts_stores[cur_player].get_policy_value(state, tau=tau) game_history.append((state, cur_player, probs)) action = np.random.choice(game.GAME_COLS, p=probs) if action not in game.possible_moves(state): print("Impossible action selected") state, won = game.move(state, action, cur_player) if won: result = 1 net1_result = 1 if cur_player == 0 else -1 break cur_player = 1-cur_player # check the draw case if len(game.possible_moves(state)) == 0: result = 0 net1_result = 0 break step += 1 if step >= steps_before_tau_0: tau = 0 if replay_buffer is not None: for state, cur_player, probs in reversed(game_history): replay_buffer.append((state, cur_player, probs, result)) result = -result return net1_result, step
def play_game(net1, steps_before_tau_0, mcts_batch_size, device="cpu"): assert isinstance(net1, model.Net) assert isinstance(steps_before_tau_0, int) and steps_before_tau_0 >= 0 assert isinstance(mcts_batch_size, int) and mcts_batch_size > 0 global mcts_searches pan = game.encode_lists([list(i) for i in game.INITIAL_STATE], 0) historystr = [] cur_player = 0 step = 0 mctsi = mcts.MCTS() result = None; exitf = False a0 = ord('1') while True: s=input('플레이하려는 진영을 선택하세요 0) 초, 1)한 ?') if s.find('level') >= 0: mcts_searches = LEVELC * int(s[6:]) print('OK', flush=True) else: player_human = 0 if int(s)<1 else 1 break while result is None: movelist = game.possible_moves(pan, cur_player, step) if step>9 and historystr[-4][:90]==historystr[-8][:90]: p = game.decode_binary(pan) for idx, m in enumerate(movelist): spos = m // 100; tpos = m % 100; y0 = spos // 9; x0 = spos % 9; y1 = tpos // 9; x1 = tpos % 9 captured = p[y1][x1]; p[y1][x1] = p[y0][x0]; p[y0][x0] = 0 ps = game.encode_lists(p, step+1) if ps[:90]==historystr[-4][:90]: del movelist[idx]; break p[y0][x0] = p[y1][x1]; p[y1][x1] = captured if (step<2 and cur_player != player_human) or (step>1 and cur_player == player_human): if step < 2: print("마상 차림을 선택하세요 0) "+masang[0]+", 1) "+masang[1]+", 2) "+masang[2]+", 3) "+masang[3]) else: render(pan, player_human) if step==2 or step==3: print("") print("옮기고자 하는 기물의 세로 번호, 가로 번호, 목적지의 세로 번호, 가로 번호 ex) 0010 한수 쉬기: 0") action = -1 while action<0: s=input((str(step-1) if step>1 else '')+' ? ') if s=="new": exitf=True; break elif s.find('level')>=0: mcts_searches=LEVELC*int(s[6:]); print('OK', flush=True) elif step<2: if len(s)==1 and s[0]>='0' and s[0]<'4': action = int(s) + 10000 elif len(s)==1: action = 0 elif s=='undo' and step>3: step-=2; historystr.pop(); historystr.pop(); pan=historystr[-1] movelist = game.possible_moves(pan, cur_player, step) render(pan, player_human) elif len(s)==4 and s[0]>='0' and s[0]<='9' and s[1]>'0' and s[1]<='9' and s[2]>='0' and s[2]<='9' and s[3]>'0' and s[3]<='9': b1=9-ord(s[0])+a0 if s[0]>'0' else 0 if player_human<1: b1=9-b1 b2 = ord(s[1]) - a0 if player_human < 1: b2 = 8 - b2 b3 = 9-ord(s[2]) + a0 if s[2]>'0' else 0 if player_human < 1: b3 = 9 - b3 b4 = ord(s[3]) - a0 if player_human < 1: b4 = 8 - b4 action = (b1*9 + b2)*100 + b3*9+b4 if action not in movelist: action = -1 else: print('OK', flush=True) else: mctsi.search_batch(mcts_searches, mcts_batch_size, pan, cur_player, net1, step, device=device) probs, values = mctsi.get_policy_value(pan, movelist, cur_player) chList = actionTable.choList if cur_player < 1 else actionTable.hanList n = np.random.choice(actionTable.AllMoveLength, p=probs) if step<steps_before_tau_0 else np.argmax(probs) action = chList[n] """for m in movelist: print('%04d %.2f' % (m, probs[chList.index(m)]), end=', ') print()""" if step<2: print(('한: ' if step<1 else '초: ')+masang[action-10000]+' '+str(values[n]), flush=True) if step==1: render(pan, player_human) else: if action<1: print('한수쉼'+' '+str(values[n])) else: b1=action//100//9 if player_human<1: b1=9-b1 b2 = action//100%9 if player_human < 1: b2 = 8 - b2 b3 = action%100//9 if player_human < 1: b3 = 9 - b3 b4 = action%100%9 if player_human < 1: b4 = 8 - b4 print((chr(9-b1+a0) if b1>0 else '0')+chr(b2+a0)+(chr(9-b3+a0) if b3>0 else '0')+chr(b4+a0)+' '+str(values[n])) if exitf: break pan, won = game.move(pan, action, step) historystr.append(pan) if won>0: render(pan, player_human) print(('초' if won==1 else '한')+' 승') break cur_player = 1-cur_player step += 1
def is_draw(self): return len(game.possible_moves(self.state)) == 0
def is_valid_move(self, move_col): return move_col in game.possible_moves(self.state)
def play_game(value, mcts_stores, queue, net1, net2, steps_before_tau_0, mcts_searches, mcts_batch_size, best_idx, url=None, username=None, device="cpu"): assert isinstance(mcts_stores, (mcts.MCTS, type(None), list)) assert isinstance(net1, Net) assert isinstance(net2, Net) assert isinstance(steps_before_tau_0, int) and steps_before_tau_0 >= 0 assert isinstance(mcts_searches, int) and mcts_searches > 0 assert isinstance(mcts_batch_size, int) and mcts_batch_size > 0 if mcts_stores is None: mcts_stores = [mcts.MCTS(), mcts.MCTS()] elif isinstance(mcts_stores, mcts.MCTS): mcts_stores.clear() mcts_stores = [mcts_stores, mcts_stores] else: mcts_stores[0].clear(); mcts_stores[1].clear() state = game.encode_lists([list(i) for i in game.INITIAL_STATE], 0) nets = [net1, net2] cur_player = 0 step = 0 tau = 1 if steps_before_tau_0 > 0 else 0 game_history = [] net1_result = None result = None while net1_result is None and (value==None or value[0]>0): mcts_stores[cur_player].search_batch(mcts_searches, mcts_batch_size, state, cur_player, nets[cur_player], step, device=device) movel = game.possible_moves(state, cur_player, step) probs, _ = mcts_stores[cur_player].get_policy_value(state, movel, cur_player, tau=tau) chList = actionTable.choList if cur_player < 1 else actionTable.hanList action = chList[np.random.choice(actionTable.AllMoveLength, p=probs)] game_history.append((action, probs) if queue is None else (state, step, probs)) if action not in movel: print("Impossible action selected") state, won = game.move(state, action, step) if step%3<1: print('.', end='', flush=True) if won>0: net1_result = 1 if won == 1 else -1 result = -net1_result break step += 1 cur_player = 1-cur_player if step >= steps_before_tau_0: tau = 0 if net1_result !=None: print() if queue is not None: dequeuef = isinstance(queue, collections.deque) for state, hstep, probs in game_history: queue.append((state, hstep, probs, result)) if dequeuef else\ queue.put((state, hstep, probs, result)) if hstep!=1: result = -result elif best_idx>=0: gh = [] for (action, probs) in game_history: prar = [] for idx, prob in enumerate(probs): if prob>0: prar.append([idx, prob]) gh.append((action, prar)) js = {"netIdx":best_idx, "result":net1_result, "username":username, "action":gh} hr = webFunction.http_request(url, True, json.dumps(js)) if hr == None: sys.exit() elif hr['status'] == 'error': print('error occured') else: print("game is uploaded") return net1_result, step if net1_result!=None else 0
def play_game(mcts_stores, replay_buffer, net1, net2, steps_before_tau_0, mcts_searches, mcts_batch_size, net1_plays_first=None, device="cpu"): """ Play one single game, memorizing transitions into the replay buffer :param mcts_stores: could be None or single MCTS or two MCTSes for individual net :param replay_buffer: queue with (state, probs, values), if None, nothing is stored :param net1: player1 :param net2: player2 :return: value for the game in respect to player1 (+1 if p1 won, -1 if lost, 0 if draw) """ assert isinstance(replay_buffer, (collections.deque, type(None))) assert isinstance(mcts_stores, (mcts.MCTS, type(None), list)) assert isinstance(net1, Net) assert isinstance(net2, Net) assert isinstance(steps_before_tau_0, int) and steps_before_tau_0 >= 0 assert isinstance(mcts_searches, int) and mcts_searches > 0 assert isinstance(mcts_batch_size, int) and mcts_batch_size > 0 if mcts_stores is None: mcts_stores = [mcts.MCTS(), mcts.MCTS()] elif isinstance(mcts_stores, mcts.MCTS): mcts_stores = [mcts_stores, mcts_stores] state = game.INITIAL_STATE nets = [net1, net2] if net1_plays_first is None: cur_player = np.random.choice(2) else: cur_player = 0 if net1_plays_first else 1 step = 0 tau = 1 if steps_before_tau_0 > 0 else 0 game_history = [] result = None net1_result = None while result is None: mcts_stores[cur_player].search_batch(mcts_searches, mcts_batch_size, state, cur_player, nets[cur_player], device=device) probs, _ = mcts_stores[cur_player].get_policy_value(state, tau=tau) game_history.append((state, cur_player, probs)) action = np.random.choice(game.GAME_COLS, p=probs) if action not in game.possible_moves(state): print("Impossible action selected") state, won = game.move(state, action, cur_player) if won: result = 1 net1_result = 1 if cur_player == 0 else -1 break cur_player = 1-cur_player # check the draw case if len(game.possible_moves(state)) == 0: result = 0 net1_result = 0 break step += 1 if step >= steps_before_tau_0: tau = 0 if replay_buffer is not None: for state, cur_player, probs in reversed(game_history): replay_buffer.append((state, cur_player, probs, result)) result = -result return net1_result, step