コード例 #1
0
 def dp(self, player, labels):
     id = self.state_to_id(labels)
     if not id in self.sg:
         candidates = model_api.get_candidates(player, labels, self.graph_mat)
         sg_set = set()
         if candidates:
             for c in candidates:
                 labels[c] = player
                 sg_set.add(self.dp(3-player, labels))
                 labels[c] = 0
         self.sg[id] = self.mex(sg_set)
     return self.sg[id]
コード例 #2
0
 def forward(self, obs):
     player, labels = obs
     candidates = model_api.get_candidates(player, labels, self.graph_mat)
     #print(player, labels, candidates)
     if candidates:
         for c in candidates:
             labels[c] = player
             if self.sg[self.state_to_id(labels)] == 0:
                 labels[c] = 0
                 return c
             labels[c] = 0
         return random.sample(candidates, 1)[0]
     else:
         return random.randint(0, self.n-1)
コード例 #3
0
 def forward(self, obs):
     player, labels = obs
     candidates = model_api.get_candidates(player, labels, self.graph_mat)
     if candidates:
         for c in candidates:
             labels[c] = player
             values = [
                 self.get_value(player, labels, self.graph_mat,
                                self.graph_dis) for c in candidates
             ]
             labels[c] = 0
         return candidates[np.argmax(values)]
     else:
         return random.randint(0, self.n - 1)
コード例 #4
0
 def min_max_search(self, depth, player, labels):
     candidates = model_api.get_candidates(player, labels, self.graph_mat)
     if not candidates:
         return -config.inf, None
     max_score = -config.inf
     policy = None
     for c in candidates:
         labels[c] = player
         if depth < self.max_depth:
             score, _ = self.min_max_search(depth+1, 3-player, labels)
             score = -score
         else:
             score = self.get_value(player, labels, self.graph_mat, self.graph_dis)
         labels[c] = 0
         if score > max_score:
             max_score = score
             policy = c
     return max_score, policy
コード例 #5
0
 def ab_search(self, depth, player, labels, history_max):
     candidates = model_api.get_candidates(player, labels, self.graph_mat)
     if not candidates:
         return -config.inf, None
     max_score = -config.inf
     policy = None
     random.shuffle(candidates)
     for c in candidates:
         labels[c] = player
         if depth < self.max_depth:
             score, _ = self.ab_search(depth + 1, 3 - player, labels,
                                       min(history_max, -max_score))
             score = -score
         else:
             score = self.get_value(player, labels, self.graph_mat,
                                    self.graph_dis)
         labels[c] = 0
         if score > max_score:
             max_score = score
             policy = c
         if max_score >= history_max:
             return max_score, policy
     return max_score, policy