def think(state): moves = state.get_moves() best_move = greedo.think(state) best_expectation = float("-inf") me = state.get_whose_turn() def rollout(state): while not state.is_terminal(): rollout_move = greedo.think(state) state.apply_move(rollout_move) return state.get_score(me) for move in moves: total_score = 0.0 for r in range(ROLLOUTS): rollout_state = state.copy() rollout_state.apply_move(move) total_score += sum( [rollout(rollout_state) for i in range(ROLLOUTS)]) expectation = float(total_score) / ROLLOUTS if expectation > best_expectation: best_expectation = expectation best_move = move print("{} rollsout with move {} with expected score {}".format( me, best_move, best_expectation)) return best_move
def think(state): me = state.get_whose_turn() moves = state.get_moves() best_move = moves[0] best_score = float('-inf') for m in moves: total_score = 0.0 score_opp = 0.0 for r in range(ROLLOUT): rollout_state = state.copy() rollout_state.apply_move(m) for h in range(DEPTH): if rollout_state.is_terminal(): break rollout_state.apply_move(greedo.think(rollout_state)) total_score += rollout_state.get_score(me) if total_score >= best_score: best_score = total_score best_move = m return best_move
def think(state): moves = state.get_moves() rootnode = Node(state, None, None) t_start = time.time() # t_deadline = t_start + 1 # iterations = 0 iterations = 0 for i in range(1000): iterations += 1 node = rootnode test_state = state.copy() # Select while node.untried_moves == [] and node.children != []: # node is fully expanded and non-terminal node = node.UCTSelectChild() test_state.apply_move(node.move) #test_state.apply_move(greedo.think(test_state)) # Expand if node.untried_moves != []: # if we can expand (i.e. state/node is non-terminal) m = random.choice(node.untried_moves) currentPlayer = test_state.get_whose_turn() test_state.apply_move(m) node = node.addChild(test_state, m, currentPlayer) # add child and descend tree while test_state.get_moves() != []: test_state.apply_move(greedo.think(test_state)) # Backpropagate while node != None: node.update(test_state.get_score(currentPlayer)) node = node.parent return sorted(rootnode.children, key=lambda c: c.visits )[-1].move # return the move that was most visited
def think(state): rootnode = Node(state=state, last_move=state.get_whose_turn()) rootnode.times_visited = 0.0 for iterations in range(ITERMAX): node = rootnode next_state = state.copy() # Selection # while node still has children to explore and all moves have been tried while not node.untried_moves and node.child_nodes: node = node.uct_select_child() next_state.apply_move(node.move) # Expansion if node.untried_moves != []: move = random.choice(node.untried_moves) turn = next_state.get_whose_turn() next_state.apply_move(move) node = node.add_child(move, next_state, last_move=turn) # Rollout while not next_state.is_terminal(): rollout_move = greedo.think(next_state) next_state.apply_move(rollout_move) # Backpropogation while node != None: result = next_state.get_score(node.who) node.times_visited += 1 node.reward += result node = node.parent chosen_node = max(rootnode.child_nodes, key=lambda c: c.reward / c.times_visited) return chosen_node.move
def rollout(state): while not state.is_terminal(): rollout_move = greedo.think(state) state.apply_move(rollout_move) return state.get_score(me)