def dp(self, player, labels): id = self.state_to_id(labels) if not id in self.sg: candidates = model_api.get_candidates(player, labels, self.graph_mat) sg_set = set() if candidates: for c in candidates: labels[c] = player sg_set.add(self.dp(3-player, labels)) labels[c] = 0 self.sg[id] = self.mex(sg_set) return self.sg[id]
def forward(self, obs): player, labels = obs candidates = model_api.get_candidates(player, labels, self.graph_mat) #print(player, labels, candidates) if candidates: for c in candidates: labels[c] = player if self.sg[self.state_to_id(labels)] == 0: labels[c] = 0 return c labels[c] = 0 return random.sample(candidates, 1)[0] else: return random.randint(0, self.n-1)
def forward(self, obs): player, labels = obs candidates = model_api.get_candidates(player, labels, self.graph_mat) if candidates: for c in candidates: labels[c] = player values = [ self.get_value(player, labels, self.graph_mat, self.graph_dis) for c in candidates ] labels[c] = 0 return candidates[np.argmax(values)] else: return random.randint(0, self.n - 1)
def min_max_search(self, depth, player, labels): candidates = model_api.get_candidates(player, labels, self.graph_mat) if not candidates: return -config.inf, None max_score = -config.inf policy = None for c in candidates: labels[c] = player if depth < self.max_depth: score, _ = self.min_max_search(depth+1, 3-player, labels) score = -score else: score = self.get_value(player, labels, self.graph_mat, self.graph_dis) labels[c] = 0 if score > max_score: max_score = score policy = c return max_score, policy
def ab_search(self, depth, player, labels, history_max): candidates = model_api.get_candidates(player, labels, self.graph_mat) if not candidates: return -config.inf, None max_score = -config.inf policy = None random.shuffle(candidates) for c in candidates: labels[c] = player if depth < self.max_depth: score, _ = self.ab_search(depth + 1, 3 - player, labels, min(history_max, -max_score)) score = -score else: score = self.get_value(player, labels, self.graph_mat, self.graph_dis) labels[c] = 0 if score > max_score: max_score = score policy = c if max_score >= history_max: return max_score, policy return max_score, policy