/
mcts.py
183 lines (156 loc) · 6.71 KB
/
mcts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import state
import agent
import random
import game
import dummyAgent
import cards
import time, sys
from collections import Counter
from math import sqrt, log
# constant used to gauge level of exploration in node selection
c = 10
# time (in seconds) MCTS is allowed to round
budget = 1
"""
Monte Carlo Tree Search:
Includes modules for game tree as well as the agent class
"""
class mctsNode(state.State):
"""
Class for game tree for MCTS
lastMove = action taken from parent node
visits = number of times node has been expanded
score = sum of results based on simulations
parent = parent node
hands = list of hand dictionaries for each player at that point based on sampling
depth = depth of node in the tree, used for testing purposes
idx = idx of agent
whosTurn = id of player to move
"""
def __init__(self, playedCards, whosTurn, hands, idx, lastMove, depth,
topCard=None, lastPlayed=None, finished=[], parent = None, score=0.):
self.lastMove = lastMove
self.depth = depth
self.visits = 1.
self.score = score
self.children = []
self.parent = parent
self.hands = hands
self.idx = idx
self.terminal = cards.empty(hands[self.idx])
self.turn = (self.idx == whosTurn)
super(mctsNode, self).__init__(playedCards, whosTurn,
topCard, lastPlayed, finished)
# adds a particular child to a node
def addChild(self, action):
# get successor state from state module
curr_state = state.State(self.playedCards, self.whosTurn,
self.topCard, self.lastPlayed, self.finished)
newState = curr_state.getChild(action)
if action == agent.PASS:
newHands = list(self.hands)
else:
player_hand = dict(self.hands[self.whosTurn])
new_hand = cards.diff(player_hand, {action[1]: action[0]})
newHands = list(self.hands)
newHands[self.whosTurn] = new_hand
score = 0.
# if agent got rid of cards, initialize score to finishing position
if self.idx in newState.finished:
score = (newState.finished.index(self.idx) + 1) ** -1
newNode = mctsNode(newState.playedCards, newState.whosTurn, newHands,
self.idx, action, self.depth + 1, newState.topCard,
newState.lastPlayed, newState.finished, self, score)
self.children.append(newNode)
# adds all children to a node
def addAllChildren(self, actions):
for action in actions:
self.addChild(action)
"""
Agent Class for MCTS agent. Contains functions for each major stage of the
algorithm.
"""
class mctsAgent(agent.Agent):
def __init__(self, idx, hand):
super(mctsAgent, self).__init__(idx, hand)
# given a list of nodes, returns best child according to UCT algorithm
def bestChild(self, children):
sorted_children = sorted(children, key = lambda child: child.score / child.visits
+ c * sqrt(log(child.parent.visits)/(child.visits + 1)))
# return best child
return sorted_children[-1]
# returns node selected by tree policy, includes the expansion of the tree
def selection(self, root):
numDone = len(root.finished)
if root.children == []:
# player to play has no cards, but game isn't finished
if (not root.turn) and cards.empty(root.hands[root.whosTurn]) and root.numPlayers > numDone:
root.addChild(agent.PASS)
return self.selection(root.children[0])
elif (root.turn and root.terminal) or root.isFinalState():
return root
else:
curr_state = state.State(root.playedCards, root.whosTurn,
root.topCard, root.lastPlayed, root.finished)
testagent = agent.Agent(root.whosTurn, root.hands[root.whosTurn])
actions = testagent.getAllActions(curr_state)
root.addAllChildren(actions)
selected_child = random.choice(root.children)
return selected_child
else:
return self.selection(self.bestChild(root.children))
"""
given a node, plays out game using the default policy returning a
(normalized) score for that node
"""
def simulation(self, node):
# if agent's hand is empty return score
if cards.empty(node.hands[self.idx]):
return node.score / node.visits
else:
count = 0
for i in range(node.numPlayers):
if cards.empty(node.hands[i]):
count += 1
if count == node.numPlayers - 1:
return (node.numPlayers + 1) ** -1
agents = [dummyAgent.DummyAgent for i in xrange(node.numPlayers)]
gm = game.Game(agents, node.hands, node.playedCards, node.whosTurn,
node.topCard, node.lastPlayed, node.finished)
results = gm.playGame()
return ((results.index(self.idx) + 1) ** -1) / node.visits
# updates all nodes up to the root based on result of simulation
def backpropagation(self, node, result):
node.visits += 1
node.score += result
if node.parent == None:
return
else:
self.backpropagation(node.parent,result)
def makeMove(self, state):
# if there is just 1 action (PASS), avoid the comptutation
actions = self.getAllActions(state)
res_actions = []
if len(actions) == 1:
return actions[0]
# do the sampling x times, then pick most common action
x = 10
for i in xrange(x):
time_start = time.time()
cardsLeft = cards.diff(cards.allCards(), [state.playedCards, self.hand])
otherRemaining = list(state.numRemaining)
del otherRemaining[self.idx]
hands = cards.dealHands(cardsLeft , otherRemaining)
hands.insert(self.idx, dict(self.hand))
root = mctsNode(state.playedCards, self.idx, hands, self.idx,
None, 0, state.topCard, state.lastPlayed, state.finished)
loop_count = 0
while time.time() < time_start + budget:
loop_count += 1
nextNode = self.selection(root)
result = self.simulation(nextNode)
self.backpropagation(nextNode, result)
sorted_children = sorted(root.children, key = lambda child: child.score/child.visits)
res_actions.append(sorted_children[-1].lastMove)
numActions = Counter(res_actions).most_common()
return numActions[0][0]