from util import gomoku_util player_color = 'black' # default 'beginner' level opponent policy 'random' # env = GomokuEnv(player_color=player_color, # opponent='medium', board_size=BOARD_SIZE) # env.reset() def get_oppo_color(color): return 'black' if color == 'white' else 'black' ai = AI(player_color) ai.load() oppo = AI(get_oppo_color(player_color)) oppo.load('policy_v3.h5') for j in range(1): total_win = 0. total_draw = 0. total_lose = 0. TRAIN_ROUND = 10 for i in range(TRAIN_ROUND): done = False state = GomokuState( Board(BOARD_SIZE), gomoku_util.BLACK) turn = 'black' steps = 0 prev_action = None
import gym from gomoku import GomokuEnv from ai import AI, BOARD_SIZE from time import sleep player_color = 'black' # default 'beginner' level opponent policy 'random' env = GomokuEnv(player_color=player_color, opponent='beginner', board_size=BOARD_SIZE) ai = AI(player_color) ai.load() total_win = total_draw = total_lose = 0 TRAIN_ROUND = 100 for i in range(TRAIN_ROUND): done = False env.reset() try: ai.reset() while not done: action = ai.play(env.state.board.board_state, env.state.board.last_action) # print('prev action', env.state.board.last_action) observation, reward, done, info = env.step(action) # sleep(3) if done: env.render(mode='human') print('round', i, "Game is Over", reward) if reward > 0:
class Quoridor(object): def __init__(self, pros): self.pros = pros self.size = 19 self.ai_num = 5 self.ai = '' def step_ai(self): if self.ai_id == 0: dis, path = self.findPath(1) x, y = path return self.step(x, y, 1) else: suc = self.ai.run(self.nw, self.ins) if type(suc) != str: if suc: return self.finish(1 - self.nw.result() if self. should_reverse else self.nw.result()) else: return self.finish(0) else: self.ins = suc self.steps += 1 return 2 def reset(self): with open('record.txt', 'w') as f: f.write("Init\n") if self.ai != '' and type(self.ai.ai) is not dict: self.ai.ai.exit() self.ai_id = random.randint(1, self.ai_num) self.err = ['', ''] self.record_json = {} self.running = True self.steps = 0 self.nw = Board(self.record_json) self.json_out = open('result' + str(self.pros) + '.json', 'w') self.should_reverse = turn = random.randint(0, 1) # self.should_reverse = turn = 0 self.record_json['id'] = [turn, 1 - turn] self.record_json['step'] = [] self.ins = "" opp_state = self.state(1) if self.ai_id == 0: self.record_json['user'] = ["training", "short_path"] else: self.ai = AI(exec_file + str(self.ai_id), 1, self.pros) suc = self.ai.load() if not suc: print("fail") return copy.deepcopy(self.state(0)), copy.deepcopy( self.finish(0)), copy.deepcopy(opp_state), copy.deepcopy( -1), copy.deepcopy(self.ai_id) suc = self.ai.init(1 - turn) if not suc: print("fail2") return copy.deepcopy(self.state(0)), copy.deepcopy( self.finish(0)), copy.deepcopy(opp_state), copy.deepcopy( -1), copy.deepcopy(self.ai_id) self.record_json['user'] = ["training", self.ai.name] if turn == 1: result = self.step_ai() if result != 2: k = -1 else: xx, yy = map(int, self.ins.split()) k = self.change_from_loc(xx, yy) if result == 2 else -1 else: result = self.nw.result() k = -1 return copy.deepcopy(self.state(0)), copy.deepcopy( result), copy.deepcopy(opp_state), copy.deepcopy(k), copy.deepcopy( self.ai_id) def state(self, id): length = 20 state = [] side = id ^ self.should_reverse if side == 0: for i in range(1, length): state.append([]) for j in range(1, length): state[i - 1].append(1 if self.nw.board[i][j] == True else 0) state[self.nw.loc[side][0] - 1][self.nw.loc[side][1] - 1] = 2 state[self.nw.loc[side ^ 1][0] - 1][self.nw.loc[side ^ 1][1] - 1] = -2 else: for i in range(1, length): state.append([]) for j in range(1, length): state[i - 1].append(1 if self.nw.board[length - i][j] == True else 0) state[length - self.nw.loc[side][0] - 1][self.nw.loc[side][1] - 1] = 2 state[length - self.nw.loc[side ^ 1][0] - 1][self.nw.loc[side ^ 1][1] - 1] = -2 return state def step(self, x, y, ai): side = ai ^ self.should_reverse if (side == 0): self.ins = str(x + 1) + ' ' + str(y + 1) res = self.nw.update([x + 1, y + 1]) else: if x % 2 == 1 and y % 2 == 0: self.ins = str(17 - x) + ' ' + str(y + 1) res = self.nw.update([17 - x, y + 1]) else: self.ins = str(19 - x) + ' ' + str(y + 1) res = self.nw.update([19 - x, y + 1]) self.steps += 1 if res != True: self.err[ai] = res return self.finish(ai ^ 1) if self.nw.result() < 2: ans = 1 - self.nw.result( ) if self.should_reverse else self.nw.result() return self.finish(ans) else: ans = 2 return ans def finish(self, winner): self.record_json['total'] = self.steps self.record_json['result'] = winner self.record_json['err'] = [self.err, ' '] if self.ai != '' and type(self.ai.ai) is not dict: self.ai.ai.exit() json_out = open('result' + str(self.pros) + '.json', 'w') json.dump(self.record_json, json_out) json_out.close() self.running = False return winner def build_graph(self, ai): state = copy.deepcopy(self.state(ai)) u = ((0, 1), (0, -1), (1, 0), (-1, 0)) G = nx.DiGraph() _G = nx.DiGraph() for i in range(1, self.size, 2): for j in range(1, self.size, 2): if state[i][j] == 2: pos = (i, j) if state[i][j] == -2: _pos = (i, j) G.add_node((i, j)) _G.add_node((i, j)) for i in range(1, self.size, 2): for j in range(1, self.size, 2): for k in range(len(u)): if (state[i + u[k][0]][j + u[k][1]]): continue x = i + u[k][0] * 2 y = j + u[k][1] * 2 _G.add_weighted_edges_from([((i, j), (x, y), 1)]) if (state[x][y] == 0 or state[x][y] == 2): G.add_weighted_edges_from([((i, j), (x, y), 1)]) else: if state[x + u[k][0]][y + u[k][1]]: for kk in range((k < 2) * 2, (k < 2) * 2 + 2): if (state[x + u[kk][0]][y + u[kk][1]]): continue G.add_weighted_edges_from([ ((i, j), (x + u[kk][0] * 2, y + u[kk][1] * 2), 1) ]) else: G.add_weighted_edges_from([ ((i, j), (x + u[k][0] * 2, y + u[k][1] * 2), 1) ]) return G, pos, _G, _pos def findPath(self, ai): G, pos, _G, _pos = self.build_graph(ai) path = nx.shortest_path(G, source=pos) dis = nx.shortest_path_length(G, source=pos) min_dis = 1000 min_path = () for i in range(1, self.size, 2): now_dis = dis.get((17, i), 1001) if now_dis < min_dis or (now_dis == min_dis and random.randint(0, 4) == 0): min_dis = now_dis min_path = path[(17, i)][1] if min_path == (): _path = nx.shortest_path(_G, source=pos) _dis = nx.shortest_path_length(_G, source=pos) for i in range(1, self.size, 2): now_dis = _dis.get((17, i), 1000) if now_dis < min_dis: min_dis = now_dis if _dis[_pos] == 1: for i in range(1, self.size, 2): for j in range(1, self.size, 2): now_dis = dis.get((i, j), 1000) if (now_dis == 1): min_path = (i, j) else: min_path = _path[_pos][1] return copy.deepcopy(min_dis), copy.deepcopy(min_path) G, pos = self.build_graph(ai) path = nx.shortest_path(G, source=pos) dis = nx.shortest_path_length(G, source=pos) min_dis = 1000 min_path = () for i in range(1, self.size, 2): now_dis = dis.get((17, i), 1000) if now_dis < min_dis: min_dis = now_dis min_path = path[(17, i)][1] if min_path == (): print(self.record_json) min_path = pos return copy.deepcopy(min_dis), copy.deepcopy(min_path) def wall_pos(self, kind): row = kind // 8 col = kind % 8 if row % 2 == 0: return row + 1, col * 2 + 2 else: return row + 1, col * 2 + 1 def change_from_loc(self, x, y): if x % 2 == 0 and y % 2 == 0: return 128 if (self.should_reverse ^ 1 == 0): return (x - 2) * 8 + (y // 2 - 1) else: if x % 2 == 0: return (16 - x) * 8 + (y // 2 - 1) else: return (18 - x) * 8 + (y // 2 - 1) def action(self, kind): if kind < 128: x, y = self.wall_pos(kind) else: dis, path = self.findPath(0) x, y = path result = self.step(x, y, 0) opp_state = self.state(1) if result != 2: return copy.deepcopy(self.state(0)), copy.deepcopy( result), copy.deepcopy(opp_state), copy.deepcopy(-1) result = self.step_ai() xx, yy = map(int, self.ins.split()) k = self.change_from_loc(xx, yy) if result == 2 else -1 return copy.deepcopy(self.state(0)), copy.deepcopy( result), copy.deepcopy(opp_state), copy.deepcopy(k)
logger = logging.getLogger("Acrobot") LOG_FORMAT = "%(levelname)s %(asctime)s - %(message)s" logging.basicConfig(filename="Acrobot.log", level=logging.DEBUG, format=LOG_FORMAT) env = gym.make('Acrobot-v1') input_shape = (args.ModelInputShape[0], args.ModelInputShape[1]) action_size = env.action_space.n batch_size = args.BatchSize Model_Name = "Acrobot-dqn.h5" agent = AI(action_size, input_shape, batch_size) if "Acrobot-dqn.h5" in os.listdir(): agent.load(Model_Name) Epochs = args.Epochs temp = [] done = False state = env.reset() actions = [] tot_reward = 0 while not done: env.render() state = np.reshape(state, [1, input_shape[0], input_shape[1]]) Q = agent.act(state) actions.append(Q) state, reward, done, info = env.step(Q) tot_reward += reward print('Game ended! Total reward: {}'.format(tot_reward))