def play_game(agent1, agent2, k, mod, deb): global debug_ debug_ = deb debug("Game begin") debug("Agent1 initializing...") agent1.reinit() debug("Agent1 init done") debug("Agent2 initializing...") agent2.reinit() debug("Agent2 init done") l1, r1, l2, r2 = 1, 1, 1, 1 turn = 0 step = 0 se = set() step_limit = k * k * k while not (end(l1, r1, l2, r2)) and step < step_limit: se.add((l1, r1, l2, r2, turn)) action_list = generate_available_action(k, l1, r1, l2, r2, turn, se, mod) if len(action_list) == 0: break if turn == 0: action = agent1.next_step(l1, r1, l2, r2, action_list) l1, r1, l2, r2, _ = generate_from_action(k, l1, r1, l2, r2, 0, action_list[action][0], action_list[action][1]) #print(action_list) else: action = agent2.next_step(l2, r2, l1, r1, action_list) l2, r2, l1, r1, _ = generate_from_action(k, l2, r2, l1, r1, 0, action_list[action][0], action_list[action][1]) step = step + 1 turn = 1 - turn debug( "Step %d, Player %d use action (%s,%s), get state (%d,%d,%d,%d)" % (step, 2 - turn, ma[action_list[action][0]], ma[action_list[action][1]], l1, r1, l2, r2)) if step >= step_limit: result = 0 debug("Draw") elif turn == 0: result = 2 debug("Agent 2 wins") else: result = 1 debug("Agent 1 wins") return result, step
def action(): op1 = int(request.args["op"]) // 2 op2 = int(request.args["op"]) % 2 game_id = request.args["gameid"] if game_list[game_id]["history"][-1][0] < 0: return redirect( url_for("human_agent", gameid=game_id, step=len(game_list[game_id]["history"]))) state = request.args["state"].replace("(", "").replace(")", "").replace( " ", "").split(",") l1 = int(state[0]) r1 = int(state[1]) l2 = int(state[2]) r2 = int(state[3]) turn = int(state[4]) # print("haha1", l1, r1, l2, r2, turn) l1, r1, l2, r2, turn = generate_from_action(k, l1, r1, l2, r2, turn, op1, op2) # print("haha2", l1, r1, l2, r2, turn) step = len(game_list[game_id]["history"]) + 2 game_list[game_id]["history"].append((l1, r1, l2, r2, turn)) return redirect(url_for("human_agent", gameid=game_id, step=step))
def simulate(self, p): pse = set(list(self.se)) if self.mod == 2: pse.add(p.state) depth = p.depth l1, r1, l2, r2, turn = p.state lose = False while not (end(l1, r1, l2, r2)) and depth <= self.k**2: action_list = generate_available_action(self.k, l1, r1, l2, r2, turn, pse, self.mod) if len(action_list) == 0: lose = True break op1, op2 = action_list[random.randint(0, len(action_list) - 1)] l1, r1, l2, r2, turn = generate_from_action( self.k, l1, r1, l2, r2, turn, op1, op2) depth += 1 if self.mod == 2: pse.add((l1, r1, l2, r2, turn)) if end(l1, r1, l2, r2) or lose: if turn == p.state[4]: return 0 else: return 2 else: return 1
def generate_next(self, l1, r1, l2, r2, turn): action_list = generate_available_action(self.k, l1, r1, l2, r2, turn, set(), self.mod) for op1, op2 in action_list: news = generate_from_action(self.k, l1, r1, l2, r2, turn, op1, op2) self.next[(l1, r1, l2, r2, turn)].append(news) self.pre[news].append((l1, r1, l2, r2, turn))
def next_step(self, l1, r1, l2, r2, action_list): if self.mod == 2: return self.agent.next_step(l1, r1, l2, r2, action_list) s = self.reform_state(l1, r1, l2, r2, 0) lose_s = [] none_s = [] win_s = [] for idx in range(0, len(action_list)): op1, op2 = action_list[idx] ns = generate_from_action(self.k, l1, r1, l2, r2, 0, op1, op2) nexts = self.reform_state(ns[0], ns[1], ns[2], ns[3], ns[4]) if self.f[nexts] is None: none_s.append(idx) elif self.f[nexts] is False: lose_s.append(idx) else: win_s.append(idx) if len(lose_s) > 0: return lose_s[random.randint(0, len(lose_s) - 1)] elif len(none_s) > 0: return none_s[random.randint(0, len(none_s) - 1)] else: return win_s[random.randint(0, len(win_s) - 1)]
def next_step(self, l1, r1, l2, r2, action_list): s = (l1, r1, l2, r2, 0) lose_s = [] none_s0 = [] none_s1 = [] win_s = [] for idx in range(0, len(action_list)): op1, op2 = action_list[idx] nexts = generate_from_action(self.k, l1, r1, l2, r2, 0, op1, op2) if self.f[nexts] is None: if nexts in self.next_cnt.keys(): if self.next_cnt[nexts][0]: none_s0.append(idx) else: none_s1.append(idx) else: none_s1.append(idx) elif self.f[nexts] is False: lose_s.append(idx) else: win_s.append(idx) if len(lose_s) > 0: return lose_s[random.randint(0, len(lose_s) - 1)] elif len(none_s0) > 0: return none_s0[random.randint(0, len(none_s0) - 1)] elif len(none_s1) > 0: return none_s1[random.randint(0, len(none_s1) - 1)] else: return win_s[random.randint(0, len(win_s) - 1)]
def expand(self, p): l1, r1, l2, r2, turn = p.state action_list = generate_available_action(self.k, l1, r1, l2, r2, turn, self.se, self.mod) p.next = [] for op1, op2 in action_list: news = generate_from_action(self.k, l1, r1, l2, r2, turn, op1, op2) p.next.append(Node(news, p.depth + 1)) result = self.simulate(p) # print(result) p.n += 1 p.v += result return result
def check(self, l1, r1, l2, r2, turn, action_list): for a in range(0, len(action_list)): news = generate_from_action(self.k, l1, r1, l2, r2, turn, action_list[a][0], action_list[a][1]) if news != self.root.next[a].state: raise NotImplementedError
def gamer(game_id, fb, method, mod): f = open(os.path.join(app.config["RESULT_DIR"], game_id + ".txt"), "w") if fb == 0: print("human vs %s" % method, file=f) else: print("%s vs human" % method, file=f) print("Mod %d" % mod, file=f) agent = generate_agent(method)(k, mod) step_limit = 75 wait_second = 0 max_wait_second = 100 while True: l = len(game_list[game_id]["history"]) if l % 2 != game_list[game_id]["turn"]: wait_second += 1 if wait_second > max_wait_second: game_list[game_id]["history"].append((-6, -6, -6, -6, -6)) print("human leaves, game ends", file=f) break time.sleep(1) continue print("from machine", game_list[game_id]["history"]) state = game_list[game_id]["history"][-1] wait_second = 0 if state[0] == -4: print("human grant lose, game ends", file=f) break if state[0] == -2 or state[0] == -7: print("human loses, game ends", file=f) break game_list[game_id]["set"].add(state) l1, r1, l2, r2, turn = state print("%d,%d,%d,%d" % (l1, r1, l2, r2), file=f) action_list = generate_available_action(k, l1, r1, l2, r2, turn, game_list[game_id]["set"], mod) if len(action_list) == 0 or end(l1, r1, l2, r2): game_list[game_id]["history"].append((-1, -1, -1, -1, -1)) print("machine loses,game ends", file=f) break if turn == 0: action = agent.next_step(l1, r1, l2, r2, action_list) l1, r1, l2, r2, turn = generate_from_action( k, l1, r1, l2, r2, turn, action_list[action][0], action_list[action][1]) else: action = agent.next_step(l2, r2, l1, r1, action_list) l1, r1, l2, r2, turn = generate_from_action( k, l1, r1, l2, r2, turn, action_list[action][0], action_list[action][1]) print("%d,%d,%d,%d" % (l1, r1, l2, r2), file=f) game_list[game_id]["set"].add((l1, r1, l2, r2, turn)) game_list[game_id]["history"].append((l1, r1, l2, r2, turn)) if end(l1, r1, l2, r2): game_list[game_id]["history"].append((-2, -2, -2, -2, -2)) print("human loses,game ends", file=f) break if len(game_list[game_id]["history"]) >= step_limit: game_list[game_id]["history"].append((-3, -3, -3, -3, -3)) print("draw, game ends", file=f) break f.close() print("Game %s ends" % game_id)