def run(): n = 5 width, height = 8, 8 model_file = 'best_policy_8_8_5.model' try: board = Board(width=width, height=height, n_in_row=n) game = Game(board) ################ human VS AI ################### # MCTS player with the policy_value_net trained by AlphaZero algorithm # policy_param = pickle.load(open(model_file, 'rb')) # best_policy = PolicyValueNet(width, height, net_params = policy_param) # mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # MCTS player with the trained policy_value_net written in pure numpy try: policy_param = pickle.load(open(model_file, 'rb')) except: policy_param = pickle.load(open(model_file, 'rb'), encoding = 'bytes') # To support python3 best_policy = PolicyValueNetNumpy(width, height, policy_param) mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # set larger n_playout for better performance # uncomment the following line to play with pure MCTS (its much weaker even with a larger n_playout) # mcts_player = MCTS_Pure(c_puct=5, n_playout=1000) # human player, input your move in the format: 2,3 human = Human() # set start_player=0 for human first game.start_play(human, mcts_player, start_player=1, is_shown=1) except KeyboardInterrupt: print('\n\rquit')
def run(): n = 5 width, height = 15, 15 model_file = 'best_policy.model' try: board = Board(width=width, height=height, n_in_row=n) game = Game(board) # ############### human VS AI ################### # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow # best_policy = PolicyValueNet(width, height, model_file = model_file) # mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy try: policy_param = pickle.load(open(model_file, 'rb')) except: policy_param = pickle.load(open(model_file, 'rb'), encoding='bytes') # To support python3 best_policy = PolicyValueNetNumpy(width, height, policy_param) mcts_player = MCTSPlayer( best_policy.policy_value_fn, c_puct=5, n_playout=400) # set larger n_playout for better performance # uncomment the following line to play with pure MCTS (it's much weaker even with a larger n_playout) # mcts_player = MCTS_Pure(c_puct=5, n_playout=1000) # human player, input your move in the format: 2,3 human = Human() # set start_player=0 for human first game.start_play(human, mcts_player, start_player=1, is_shown=1) except KeyboardInterrupt: print('\n\rquit')
def receive_gameSet(): receive_data = request.get_json() print(receive_data) board = Board(width=9, height=9, n_in_row=5) board.init_board(1) hard_idx = receive_data['hard_idx'] hards = [2500, 5000, 7500, 10000, 12500, 15000, 17500, 20000] model_file = f'./model/policy_9_{hards[hard_idx]}.model' policy_param = pickle.load(open(model_file, 'rb'), encoding='bytes') best_policy = PolicyValueNetNumpy(9, 9, policy_param) mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # AI가 先인 경우, 1번 먼저 돌을 둔다. ai_move = mcts_player.get_action(board) ai_loc = board.move_to_location(ai_move) states_loc = [[0] * 9 for _ in range(9)] states_loc[ai_loc[0]][ai_loc[1]] = 2 data = { 'ai_moved': list(map(int, ai_loc)), 'states_loc': states_loc, 'message': None } return jsonify(data)
def run(): n = 5 width, height = 9, 9 print("이 오목 인공지능은 9x9 환경에서 동작합니다.") print("현재 가능한 난이도(정책망의 학습 횟수) 목록 : [ 2500, 5000, 7500, 10000, 12500, 15000, 17500, 20000 ]") print("난이도를 입력하세요.") hard = int(input()) model_file = f'./omok_AI/model/policy_9_{hard}.model' # colab # model_file = f'./model/policy_9_{hard}.model' # local print("자신이 선공(흑)인 경우에 0, 후공(백)인 경우에 1을 입력하세요.") order = int(input()) if order not in [0,1] : return "강제 종료" board = Board(width=width, height=height, n_in_row=n) game = Game(board) # 이미 제공된 model을 불러와서 학습된 policy_value_net을 얻는다. policy_param = pickle.load(open(model_file, 'rb'), encoding='bytes') best_policy = PolicyValueNetNumpy(width, height, policy_param) mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # n_playout값 : 성능 human = Human() # start_player = 0 → 사람 선공 / 1 → AI 선공 game.start_play(human, mcts_player, start_player=order, is_shown=1)
def __init__(self): policy_param = pickle.load(open('best_policy_8_8_5.model', 'rb'), encoding='bytes') best_policy = PolicyValueNetNumpy(8, 8, policy_param) mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) human = Human() self.game = Game(Board(width=8, height=8, n_in_row=5), human, mcts_player)
def run(width=15, height=15): n = 5 #width, height = 15, 15 model_file = 'current_policy.model' try: board = Board(width=width, height=height, n_in_row=n) game = Game(board) instruction = input() if isinstance(instruction, str) and instruction == "BEGIN": begin = 1 first_move = 0 elif isinstance(instruction, str) and instruction.split()[0] == "TURN": first_location = [ int(n, 10) for n in instruction.split()[1].split(",") ] first_move = board.location_to_move(first_location) begin = 0 else: begin = 0 first_move = 0 ################ human VS AI ################### # MCTS player with the policy_value_net trained by AlphaZero algorithm # policy_param = pickle.load(open(model_file, 'rb')) # best_policy = PolicyValueNet(width, height, net_params = policy_param) # mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # MCTS player with the trained policy_value_net written in pure numpy #try: # policy_param = pickle.load(open(model_file, 'rb')) #except: policy_param = pickle.load(open(model_file, 'rb'), encoding='bytes') # To support python3 best_policy = PolicyValueNetNumpy(width, height, policy_param) mcts_player = MCTSPlayer( best_policy.policy_value_fn, c_puct=5, n_playout=1000) # set larger n_playout for better performance #uncomment the following line to play with pure MCTS (its much weaker even with a larger n_playout) # mcts_player = MCTS_Pure(c_puct=5, n_playout=10) # human player, input your move in the format: 2,3 human1 = Human() #human2 = Human() #print(human.__str__()) # set start_player=0 for human first game.start_play(human1, mcts_player, begin, is_shown=1, first_move=first_move) except KeyboardInterrupt: print('\n\rquit')
def __init__(self, root_state): self.root = Node(root_state, 999999999) # policy value function from pretrained model from gomoku_state import GRID_LEN from policy_value_net_numpy import PolicyValueNetNumpy if GRID_LEN == 6: model_file = 'best_policy_6_6_4.model' else: model_file = 'best_policy_8_8_5.model' import pickle policy_params = pickle.load(open(model_file, 'rb')) nn = PolicyValueNetNumpy(GRID_LEN, GRID_LEN, policy_params) self.policy_value_fn = nn.policy_value_fn
def run(): n = 5 width, height = 10, 10 model_file = 'best_policy_10_10_5.model' #model_file = 'best_policy_8_8_5.model' try: board = Board(width=width, height=height, n_in_row=n) game = Game(board) # policyvaluenet = PolicyValueNet(board_width=width, board_height=height, model_file=model_file, use_gpu=False) # ############### human VS AI ################### # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow # best_policy = PolicyValueNet(width, height, model_file = model_file) # mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy try: policy_param = pickle.load(open(model_file, 'rb')) except: policy_param = pickle.load(open(model_file, 'rb'), encoding='bytes') # To support python3 # keys = ['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias', 'conv3.weight', 'conv3.bias' # , 'act_conv1.weight', 'act_conv1.bias', 'act_fc1.weight', 'act_fc1.bias' # , 'val_conv1.weight', 'val_conv1.bias', 'val_fc1.weight', 'val_fc1.bias', 'val_fc2.weight', # 'val_fc2.bias'] # param_pytorch = OrderedDict() # for key, value in zip(keys, policy_param): # if 'fc' in key and 'weight' in key: # param_pytorch[key] = torch.FloatTensor(value.T) # elif 'conv' in key and 'weight' in key: # param_pytorch[key] = torch.FloatTensor(value[:, :, ::-1, ::-1].copy()) # else: # param_pytorch[key] = torch.FloatTensor(value) best_policy = PolicyValueNetNumpy(width, height, policy_param) mcts_player = MCTSPlayer( best_policy.policy_value_fn, c_puct=5, n_playout=400) # set larger n_playout for better performance # uncomment the following line to play with pure MCTS (it's much weaker even with a larger n_playout) # mcts_player = MCTS_Pure(c_puct=5, n_playout=1000) # human player, input your move in the format: 2,3 human = Human() # set start_player=0 for human first game.start_play(human, mcts_player, start_player=0, is_shown=1) except KeyboardInterrupt: print('\n\rquit')
def run(): n = 5 width, height = 8, 8 model_file = 'best_policy_8_8_5.model' try: board = Board(width=width, height=height, n_in_row=n) game = Game(board) # ############### human VS AI ################### # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow # best_policy = PolicyValueNet(width, height, model_file = model_file) # mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy try: policy_param = pickle.load(open(model_file, 'rb')) except: policy_param = pickle.load(open(model_file, 'rb'), encoding='bytes') # To support python3 best_policy = PolicyValueNetNumpy(width, height, policy_param) mcts_player = MCTSPlayer( best_policy.policy_value_fn, c_puct=5, n_playout=400) # set larger n_playout for better performance #pure mcts player #make quick_play=True to enable a weaker but much faster roll-out player without mcts pure_mcts_player = MCTS_Pure(c_puct=1, n_playout=600, quick_play=False) roll_out_player = MCTS_Pure(quick_play=True) #1.run with two human player game.start_play_with_UI() #2.run with alpha zero nerutral network AI, and my quick roll-out AI #game.start_play_with_UI(AI=mcts_player, AI2 = roll_out_player) #3.run with alpha zero nerutral network AI, and my pure mcts AI #game.start_play_with_UI(AI=mcts_player, AI2 = pure_mcts_player) except KeyboardInterrupt: print('\n\rquit')
def get_mcts_player(player_index=1): """ Get an mcts player, an index of 1 corresponds to first player (typically human) and an index of 2 corresponds to the second player (typically AI opponent). """ board = Board() board.init_board() size = 8 model_file = '../AlphaZero_Gomoku/best_policy_8_8_5.model' try: policy_param = pickle.load(open(model_file, 'rb')) except Exception: policy_param = pickle.load(open(model_file, 'rb'), encoding='bytes') best_policy = PolicyValueNetNumpy(size, size, policy_param) mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=200) mcts_player.set_player_ind(player_index) return mcts_player
def parse_agent(agent_type, filename): if agent_type == 'mcts_a0': model_file = 'best_policy_8_8_5.model' if filename: model_file = filename # load the trained policy_value_net in either Theano/Lasagne, PyTorch or TensorFlow # best_policy = PolicyValueNet(width, height, model_file = model_file) # mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) # load the provided model (trained in Theano/Lasagne) into a MCTS player written in pure numpy try: policy_param = pickle.load(open(model_file, 'rb')) except: policy_param = pickle.load( open(model_file, 'rb'), encoding='bytes') # To support python3 best_policy = PolicyValueNetNumpy(width, height, policy_param) player = MCTSPlayer( best_policy.policy_value_fn, c_puct=5, n_playout=400 ) # set larger n_playout for better performance elif agent_type == 'mcts_pure': player = MCTS_Pure(c_puct=5, n_playout=1000) elif agent_type == 'minmax': player = Minimax() elif agent_type == 'dqn': model_file = 'output/v_1/epoch_100/agent_2.pkl' if filename: model_file = filename player = DQNPlayer(model_file) elif agent_type == 'human': player = Human() else: player = Human() print('Illegal Agent Type. Defaulting to human player.') return player
def player_moved(): receive_data = request.get_json() print(receive_data) board = Board(width=9, height=9, n_in_row=5) board.init_board(0) states_loc = receive_data['states_loc'] if states_loc != None: board.states_loc = states_loc board.states_loc_to_states() # 플레이어가 둔 돌의 위치를 받고 player_loc = receive_data['player_moved'] player_move = board.location_to_move(player_loc) board.do_move(player_move) board.set_forbidden() # 금수 자리 업데이트 print(np.array(board.states_loc)) print(board.states) # 승리 판정 (플레이어가 이겼는지) end, winner = board.game_end() if end: if winner == -1: message = "tie" else: message = winner data = { 'ai_moved': None, 'forbidden': board.forbidden_locations, 'message': message } return jsonify(data) # AI가 둘 위치를 보낸다. # 난이도에 해당하는 player 불러옴. hard_idx = receive_data['hard_idx'] hards = [2500, 5000, 7500, 10000, 12500, 15000, 17500, 20000] model_file = f'./model/policy_9_{hards[hard_idx]}.model' policy_param = pickle.load(open(model_file, 'rb'), encoding='bytes') best_policy = PolicyValueNetNumpy(9, 9, policy_param) mcts_player = MCTSPlayer(best_policy.policy_value_fn, c_puct=5, n_playout=400) ai_move = mcts_player.get_action(board) ai_loc = board.move_to_location(ai_move) board.do_move(ai_move) board.set_forbidden() # 금수 자리 업데이트 print(np.array(board.states_loc)) # 승리 판정 (AI가 이겼는지) message = None end, winner = board.game_end() if end: if winner == -1: message = "tie" else: message = winner data = { 'ai_moved': list(map(int, ai_loc)), 'states_loc': board.states_loc, 'forbidden': board.forbidden_locations, 'message': message } return jsonify(data)
def run_game(surface, omok, menu): omok.init_game() # board = 0 ~ 7 omok.id = 0 board = Board(width=8, height=8, n_in_row=5) game = Game(board) model_file = 'best_policy_8_8_5.model' while True: # balck stone if (omok.id == 0): a = random.randint(3, 4) b = random.randint(3, 4) print('first stone', a, b) omok.board[a][b] = 1 omok.check_board_black(a, b) human = Human(a, b) game.start_play(human) elif (omok.turn == 1 and omok.id > 1): # 4 defense, 4attack, 3 defense, 3 attack # 4 defense code # right under 4 defense for x in range(0, 4): for y in range(0, 4): ## 0~11 +4 max = 15 if (omok.board[x][y] == 2 and omok.board[x + 1][y + 1] == 2 and omok.board[x + 2][y + 2] == 2 and omok.board[x + 3][y + 3] == 2 and omok.turn == 1): if (omok.board[x + 4][y + 4] == 0): omok.board[x + 4][y + 4] = 1 omok.check_board_black(x + 4, y + 4) human = Human(x + 4, y + 4) game.start_play(human) elif (x > 0 and y > 0 and omok.board[x - 1][y - 1] == 0): omok.board[x - 1][y - 1] = 1 omok.check_board_black(x - 1, y - 1) human = Human(x - 1, y - 1) game.start_play(human) elif (omok.board[x][y] == 2 and omok.board[x + 1][y + 1] == 0 and omok.board[x + 2][y + 2] == 2 and omok.board[x + 3][y + 3] == 2 and omok.turn == 1): omok.board[x + 1][y + 1] = 1 omok.check_board_black(x + 1, y + 1) human = Human(x + 1, y + 1) game.start_play(human) elif (omok.board[x][y] == 2 and omok.board[x + 1][y + 1] == 2 and omok.board[x + 2][y + 2] == 0 and omok.board[x + 3][y + 3] == 2 and omok.turn == 1): omok.board[x + 2][y + 2] = 1 omok.check_board_black(x + 2, y + 2) human = Human(x + 2, y + 2) game.start_play(human) # left under 4 defense for x in range(4, 7): # 3 ~ 14 for y in range(0, 4): if (omok.board[x][y] == 2 and omok.board[x - 1][y + 1] == 2 and omok.board[x - 2][y + 2] == 2 and omok.board[x - 3][y + 3] == 2 and omok.turn == 1): if (y > 0 and omok.board[x + 1][y - 1] == 0): omok.board[x + 1][y - 1] = 1 omok.check_board_black(x + 1, y - 1) human = Human(x + 1, y - 1) game.start_play(human) elif (omok.board[x - 4][y + 4] == 0): omok.board[x - 4][y + 4] = 1 omok.check_board_black(x - 4, y + 4) human = Human(x - 4, y + 4) game.start_play(human) elif (omok.board[x][y] == 2 and omok.board[x - 1][y + 1] == 0 and omok.board[x - 2][y + 2] == 2 and omok.board[x - 3][y + 3] == 2 and omok.turn == 1): omok.board[x - 1][y + 1] = 1 omok.check_board_black(x - 1, y + 1) human = Human(x - 1, y + 1) game.start_play(human) elif (omok.board[x][y] == 2 and omok.board[x - 1][y + 1] == 2 and omok.board[x - 2][y + 2] == 0 and omok.board[x - 3][y + 3] == 2 and omok.turn == 1): omok.board[x - 2][y + 2] = 1 omok.check_board_black(x - 2, y + 2) human = Human(x - 2, y + 2) game.start_play(human) # -> 4 defense for x in range(0, 4): for y in range(0, 8): if (omok.board[x][y] == 2 and omok.board[x + 1][y] == 2 and omok.board[x + 2][y] == 2 and omok.board[x + 3][y] == 2 and omok.turn == 1): if (omok.board[x + 4][y] == 0): omok.board[x + 4][y] = 1 omok.check_board_black(x + 4, y) human = Human(x + 4, y) game.start_play(human) elif (x > 0 and omok.board[x - 1][y] == 0): omok.board[x - 1][y] = 1 omok.check_board_black(x - 1, y) human = Human(x - 1, y) game.start_play(human) elif (omok.board[x][y] == 2 and omok.board[x + 1][y] == 0 and omok.board[x + 2][y] == 2 and omok.board[x + 3][y] == 2 and omok.turn == 1): omok.board[x + 1][y] = 1 omok.check_board_black(x + 1, y) human = Human(x + 1, y) game.start_play(human) elif (omok.board[x][y] == 2 and omok.board[x + 1][y] == 2 and omok.board[x + 2][y] == 0 and omok.board[x + 3][y] == 2 and omok.turn == 1): omok.board[x + 2][y] = 1 omok.check_board_black(x + 2, y) human = Human(x + 2, y) game.start_play(human) # to under 4 defense for x in range(0, 8): for y in range(0, 4): if (omok.board[x][y] == 2 and omok.board[x][y + 1] == 2 and omok.board[x][y + 2] == 2 and omok.board[x][y + 3] == 2 and omok.turn == 1): if (omok.board[x][y + 4] == 0): omok.board[x][y + 4] = 1 omok.check_board_black(x, y + 4) human = Human(x, y + 4) game.start_play(human) elif (y > 0 and omok.board[x][y - 1] == 0): omok.board[x][y - 1] = 1 omok.check_board_black(x, y - 1) human = Human(x, y - 1) game.start_play(human) elif (omok.board[x][y] == 2 and omok.board[x][y + 1] == 0 and omok.board[x][y + 2] == 2 and omok.board[x][y + 3] == 2 and omok.turn == 1): omok.board[x][y + 1] = 1 omok.check_board_black(x, y + 1) human = Human(x, y + 1) game.start_play(human) elif (omok.board[x][y] == 2 and omok.board[x][y + 1] == 2 and omok.board[x][y + 2] == 0 and omok.board[x][y + 3] == 2 and omok.turn == 1): omok.board[x][y + 2] = 1 omok.check_board_black(x, y + 2) human = Human(x, y + 2) game.start_play(human) # condition1 = fair position # place the black stone near the place where the black stone gathers # 4 attack code # right under 4 attack for x in range(0, 4): # 12 for y in range(0, 4): # 12 if (omok.board[x][y] == 1 and omok.board[x + 1][y + 1] == 1 and omok.board[x + 2][y + 2] == 1 and omok.board[x + 3][y + 3] == 1 and omok.turn == 1): if (omok.board[x + 4][y + 4] == 0): omok.board[x + 4][y + 4] = 1 omok.check_board_black(x + 4, y + 4) human = Human(x + 4, y + 4) game.start_play(human) elif (x > 0 and y > 0 and omok.board[x - 1][y - 1] == 0): omok.board[x - 1][y - 1] = 1 omok.check_board_black(x - 1, y - 1) human = Human(x - 1, y - 1) game.start_play(human) elif (omok.board[x][y] == 1 and omok.board[x + 1][y + 1] == 0 and omok.board[x + 2][y + 2] == 1 and omok.board[x + 3][y + 3] == 1 and omok.turn == 1): omok.board[x + 1][y + 1] = 1 omok.check_board_black(x + 1, y + 1) human = Human(x + 1, y + 1) game.start_play(human) elif (omok.board[x][y] == 1 and omok.board[x + 1][y + 1] == 1 and omok.board[x + 2][y + 2] == 0 and omok.board[x + 3][y + 3] == 1 and omok.turn == 1): omok.board[x + 2][y + 2] = 1 omok.check_board_black(x + 2, y + 2) human = Human(x + 2, y + 2) game.start_play(human) # left under 4 attack for x in range(4, 7): # 3 ~ 14 # 3,15 for y in range(0, 4): if (omok.board[x][y] == 1 and omok.board[x - 1][y + 1] == 1 and omok.board[x - 2][y + 2] == 1 and omok.board[x - 3][y + 3] == 1 and omok.turn == 1): if (x < 7 and y > 0 and omok.board[x + 1][y - 1] == 0): omok.board[x + 1][y - 1] = 1 omok.check_board_black(x + 1, y - 1) human = Human(x + 1, y - 1) game.start_play(human) elif (omok.board[x - 4][y + 4] == 0): omok.board[x - 4][y + 4] = 1 omok.check_board_black(x - 4, y + 4) human = Human(x - 4, y + 4) game.start_play(human) elif (omok.board[x][y] == 1 and omok.board[x - 1][y + 1] == 0 and omok.board[x - 2][y + 2] == 1 and omok.board[x - 3][y + 3] == 1 and omok.turn == 1): omok.board[x - 1][y + 1] = 1 omok.check_board_black(x - 1, y + 1) human = Human(x - 1, y + 1) game.start_play(human) elif (omok.board[x][y] == 1 and omok.board[x - 1][y + 1] == 1 and omok.board[x - 2][y + 2] == 0 and omok.board[x - 3][y + 3] == 1 and omok.turn == 1): omok.board[x - 2][y + 2] = 1 omok.check_board_black(x - 2, y + 2) human = Human(x - 2, y + 2) game.start_play(human) # -> 4 attack for x in range(0, 4): # 12 for y in range(0, 8): # 12 if (omok.board[x][y] == 1 and omok.board[x + 1][y] == 1 and omok.board[x + 2][y] == 1 and omok.board[x + 3][y] == 1 and omok.turn == 1): if (omok.board[x + 4][y] == 0): omok.board[x + 4][y] = 1 omok.check_board_black(x + 4, y) human = Human(x + 4, y) game.start_play(human) elif (x > 0 and omok.board[x - 1][y] == 0): omok.board[x - 1][y] = 1 omok.check_board_black(x - 1, y) human = Human(x - 1, y) game.start_play(human) elif (omok.board[x][y] == 1 and omok.board[x + 1][y] == 0 and omok.board[x + 2][y] == 1 and omok.board[x + 3][y] == 1 and omok.turn == 1): omok.board[x + 1][y] = 1 omok.check_board_black(x + 1, y) human = Human(x + 1, y) game.start_play(human) elif (omok.board[x][y] == 1 and omok.board[x + 1][y] == 1 and omok.board[x + 2][y] == 0 and omok.board[x + 3][y] == 1 and omok.turn == 1): omok.board[x + 2][y] = 1 omok.check_board_black(x + 2, y) human = Human(x + 2, y) game.start_play(human) # to under 4 attack for x in range(0, 8): # 15 for y in range(0, 4): # 12 if (omok.board[x][y] == 1 and omok.board[x][y + 1] == 1 and omok.board[x][y + 2] == 1 and omok.board[x][y + 3] == 1 and omok.turn == 1): if (omok.board[x][y + 4] == 0): omok.board[x][y + 4] = 1 omok.check_board_black(x, y + 4) human = Human(x, y + 4) game.start_play(human) elif (y > 0 and omok.board[x][y - 1] == 0): omok.board[x][y - 1] = 1 omok.check_board_black(x, y - 1) human = Human(x, y - 1) game.start_play(human) elif (omok.board[x][y] == 1 and omok.board[x][y + 1] == 0 and omok.board[x][y + 2] == 1 and omok.board[x][y + 3] == 1 and omok.turn == 1): omok.board[x][y + 1] = 1 omok.check_board_black(x, y + 1) human = Human(x, y + 1) game.start_play(human) elif (omok.board[x][y] == 1 and omok.board[x][y + 1] == 1 and omok.board[x][y + 2] == 0 and omok.board[x][y + 3] == 1 and omok.turn == 1): omok.board[x][y + 2] = 1 omok.check_board_black(x, y + 2) human = Human(x, y + 2) game.start_play(human) # 3 defense # right under 3 defense for x in range(0, 5): # 0~12 +3 max =15 for y in range(0, 5): if (omok.board[x][y] == 2 and omok.board[x + 1][y + 1] == 2 and omok.board[x + 2][y + 2] == 2 and omok.turn == 1): if (omok.board[x + 3][y + 3] == 0): omok.board[x + 3][y + 3] = 1 omok.check_board_black(x + 3, y + 3) human = Human(x + 3, y + 3) game.start_play(human) elif (y > 0 and x > 0 and omok.board[x - 1][y - 1] == 0): omok.board[x - 1][y - 1] = 1 omok.check_board_black(x - 1, y - 1) human = Human(x - 1, y - 1) game.start_play(human) # left under 3 defense for x in range(3, 7): for y in range(0, 5): if (omok.board[x][y] == 2 and omok.board[x - 1][y + 1] == 2 and omok.board[x - 2][y + 2] == 2 and omok.turn == 1): if (y > 0 and omok.board[x + 1][y - 1] == 0): omok.board[x + 1][y - 1] = 1 omok.check_board_black(x + 1, y - 1) human = Human(x + 1, y - 1) game.start_play(human) elif (omok.board[x - 3][y + 3] == 0): omok.board[x - 3][y + 3] = 1 omok.check_board_black(x - 3, y + 3) human = Human(x - 3, y + 3) game.start_play(human) # -> defense # > 3 defense for x in range(0, 5): for y in range(0, 8): if (omok.board[x][y] == 2 and omok.board[x + 1][y] == 2 and omok.board[x + 2][y] == 2 and omok.turn == 1): if (omok.board[x + 3][y] == 0): omok.board[x + 3][y] = 1 omok.check_board_black(x + 3, y) human = Human(x + 3, y) game.start_play(human) elif (x > 0 and omok.board[x - 1][y] == 0): omok.board[x - 1][y] = 1 omok.check_board_black(x - 1, y) human = Human(x - 1, y) game.start_play(human) # to under defense # under 3 defense for x in range(0, 8): for y in range(0, 5): # 0~12 for i in range(0,14) if (omok.board[x][y] == 2 and omok.board[x][y + 1] == 2 and omok.board[x][y + 2] == 2 and omok.turn == 1): if (omok.board[x][y + 3] == 0): omok.board[x][y + 3] = 1 omok.check_board_black(x, y + 3) human = Human(x, y + 3) game.start_play(human) elif (y > 0 and omok.board[x][y - 1] == 0): omok.board[x][y - 1] = 1 omok.check_board_black(x, y - 1) human = Human(x, y - 1) game.start_play(human) # 3 attack # right under 3 attack for x in range(0, 5): for y in range(0, 5): # 13 if (omok.board[x][y] == 1 and omok.board[x + 1][y + 1] == 1 and omok.board[x + 2][y + 2] == 1 and omok.turn == 1): if (omok.board[x + 3][y + 3] == 0): omok.board[x + 3][y + 3] = 1 omok.check_board_black(x + 3, y + 3) human = Human(x + 3, y + 3) game.start_play(human) elif (y > 0 and x > 0 and omok.board[x - 1][y - 1] == 0): omok.board[x - 1][y - 1] = 1 omok.check_board_black(x - 1, y - 1) human = Human(x - 1, y - 1) game.start_play(human) # left under attack for x in range(3, 7): # 2 ~ 14 # 2,15 for y in range(0, 5): # 13 if (omok.board[x][y] == 1 and omok.board[x - 1][y + 1] == 1 and omok.board[x - 2][y + 2] == 1 and omok.turn == 1): if (x < 7 and y > 0 and omok.board[x + 1][y - 1] == 0): omok.board[x + 1][y - 1] = 1 omok.check_board_black(x + 1, y - 1) human = Human(x + 1, y - 1) game.start_play(human) elif (omok.board[x - 3][y + 3] == 0): omok.board[x - 3][y + 3] = 1 omok.check_board_black(x - 3, y + 3) human = Human(x - 3, y + 3) game.start_play(human) # -> attack # > 3 attack for x in range(0, 5): # 4 for y in range(0, 8): # 7 if (omok.board[x][y] == 1 and omok.board[x + 1][y] == 1 and omok.board[x + 2][y] == 1 and omok.turn == 1): if (omok.board[x + 3][y] == 0): omok.board[x + 3][y] = 1 omok.check_board_black(x + 3, y) human = Human(x + 3, y) game.start_play(human) elif (x > 0 and omok.board[x - 1][y] == 0): omok.board[x - 1][y] = 1 omok.check_board_black(x - 1, y) human = Human(x - 1, y) game.start_play(human) # to under attack for x in range(0, 8): # 15 for y in range(0, 5): # 13 if (omok.board[x][y] == 1 and omok.board[x][y + 1] == 1 and omok.board[x][y + 2] == 1 and omok.turn == 1): if (omok.board[x][y + 3] == 0): omok.board[x][y + 3] = 1 omok.check_board_black(x, y + 3) human = Human(x, y + 3) game.start_play(human) elif (y > 0 and omok.board[x][y - 1] == 0): omok.board[x][y - 1] = 1 omok.check_board_black(x, y - 1) human = Human(x, y - 1) game.start_play(human) # left under attack for x in range(3, 7): # 2 ~ 14 # 2,15 for y in range(0, 5): # 13 if (omok.board[x][y] == 1 and omok.board[x - 1][y + 1] == 1 and omok.board[x - 2][y + 2] == 1 and omok.turn == 1): if (x < 7 and y > 0 and omok.board[x + 1][y - 1] == 0): omok.board[x + 1][y - 1] = 1 omok.check_board_black(x + 1, y - 1) human = Human(x + 1, y - 1) game.start_play(human) elif (omok.board[x - 3][y + 3] == 0): omok.board[x - 3][y + 3] = 1 omok.check_board_black(x - 3, y + 3) human = Human(x - 3, y + 3) game.start_play(human) # defense code # empty triangle """for x in range(2, 4): for y in range(3, 5): if (omok.board[x][y] == 1 and omok.board[x + 1][y] == 1 and omok.board[x][ y - 1] == 0 and omok.turn == 1): if (y > 0 and omok.board[x][y - 1] == 0): omok.board[x][y - 1] = 1 omok.check_board_black(x, y - 1) human = Human(x, y - 1) game.start_play(human)""" if (omok.turn != 2 and omok.id != 0): movelist = list() countlist = list() for a in range(1, 7): # 1 ~ 6 for b in range(1, 7): # 1 ~ 6 count = 0 if (omok.board[a][b] != 0): # a b =!0 pass else: if (omok.board[a - 1][b - 1] == 1): count = count + 1 if (omok.board[a][b - 1] == 1): count = count + 1 if (omok.board[a + 1][b - 1] == 1): count = count + 1 if (omok.board[a - 1][b] == 1): count = count + 1 if (omok.board[a + 1][b] == 1): count = count + 1 if (omok.board[a - 1][b + 1] == 1): count = count + 1 if (omok.board[a][b + 1] == 1): count = count + 1 if (omok.board[a + 1][b + 1] == 1): count = count + 1 xymove = str(a) + ',' + str( b) # (a,b) list add (1,1) , (1,2) movelist.append( xymove) # xy's move list add(1,1 ~ 13,13) countlist.append( count ) # 모든 좌표마다의 가중치 값을 카운트 리스트에 넣음 [0 , 0 , 0 , ...] move = countlist.index(max(countlist)) # 가장 큰 가중치가 있는 좌표 반환 splitposition = movelist[move].split( ',' ) # 받은 값을 x , y로 변환 = splitposition[0],spolitposition[1] if (move == 0): # 배열에 아무것도 없다면 혹은 가중치가 다 같다면 ? indexx = 0 # x,y를 0 초기화 indexy = 0 for c in range(2, 5): for d in range(2, 5): if (omok.board[c][d] == 2): # block stone point indexx = c indexy = d movepm = [1, 2, -1, -2, 0] var1 = movepm[random.randint(0, 4)] if (var1 == 0): var2 = movepm[random.randint(0, 3)] else: var2 = movepm[random.randint( 0, 4)] # var1이 0 아니라면 -1 1 0중에 하나 고 indexx = indexx + var1 indexy = indexy + var2 splitposition[0] = str(indexx) splitposition[1] = str(indexy) positionx = int(splitposition[0]) positiony = int(splitposition[1]) try: if (positionx > 0 and positiony > 0 and omok.board[positionx][positiony] == 0): omok.board[positionx][positiony] = 1 omok.check_board_black(positionx, positiony) human = Human(positionx, positiony) game.start_play(human) except: pass elif (omok.turn == 2): try: policy_param = pickle.load(open(model_file, 'rb')) except: policy_param = pickle.load(open(model_file, 'rb'), encoding='bytes') best_policy = PolicyValueNetNumpy(8, 8, policy_param) print('current model is', model_file) mcts = MCTSPlayer(best_policy.policy_value_fn, c_puct=7, n_playout=2000) move = game.start_play1(mcts) x = move // board_size y = move % board_size omok.board[x][y] = 2 omok.check_board_white(x, y) if omok.is_gameover: ##return true return pygame.display.update() fps_clock.tick(fps)
result.init_board(start_player=0) if input1 != ' ': for i in range(len(input1)): result.states[input1[i]] = 1 result.availables.remove(input1[i]) if input2 != ' ': for j in range(len(input2)): result.states[input2[j]] = 2 result.availables.remove(input2[j]) result.current_player = 1 return result parsed_input1, parsed_input2, ai = parse(input_from_app) width = height = length_list[ai] board = makemap(parsed_input1, parsed_input2) model_file = model_file_list[ai] try: policy_param = pickle.load(open(model_file, 'rb')) except: policy_param = pickle.load(open(model_file, 'rb'), encoding='bytes') # To support python3 best_policy = PolicyValueNetNumpy(width, height, policy_param) mcts_player = MCTSPlayer( best_policy.policy_value_fn, c_puct=5, n_playout=400) # set larger n_playout for better performance print(mcts_player.get_action(board)) sys.stdout.flush()