def main(in1, in2): in1 = str(in1) in2 = str(in2) p1_dict = {"human": players.HumanPlayer("Team Jimmy", 1), "random": players.RandomPlayer(1), "mini_easy": players.MinimaxPlayer(1, 3), "mini_medium": players.MinimaxPlayer(1, 5), "mini_hard": players.MinimaxPlayer(1, 6), "net_random": players.NetPlayer(1, "Random"), "net_easy": players.NetPlayer(1, "Easy"), "net_medium": players.NetPlayer(1, "Medium"), "net_hard": players.NetPlayer(1, "Hard") } result = "" h2_name = "second place" for c in h2_name: result = result + c + '\u0336' p2_dict = {"human": players.HumanPlayer("Team " + result + " Ben", 2), "random": players.RandomPlayer(2), "mini_easy": players.MinimaxPlayer(2, 3), "mini_medium": players.MinimaxPlayer(2, 5), "mini_hard": players.MinimaxPlayer(2, 6), "net_random": players.NetPlayer(2, "Random"), "net_easy": players.NetPlayer(2, "Easy"), "net_medium": players.NetPlayer(2, "Medium"), "net_hard": players.NetPlayer(2, "Hard") } player1 = p1_dict[in1] player2 = p2_dict[in2] game_board = GameBoard([player1, player2]) game_board.game_loop() # there has to be a better way to do this
def main(): try: args, constants = init_variables() sock = utils.connect_to_server(args.host, args.port, args.name, constants) if args.smart: ply = players.SmartPlayer(args.name, sock) elif args.random: ply = players.RandomPlayer(args.name, sock) else: ply = players.SmartPlayer(args.name, sock) ply.play() except KeyboardInterrupt: if constants.connected: sock.shutdown(socket.SHUT_RDWR) sock.close()
def main(): '''Main Method''' RANDOM_PLAYER = players.RandomPlayer() SEQUENTIAL_PLAYER = players.SequentialPlayer() MOSTCOMMON_PLAYER = players.MostCommonPlayer() HISTORIC_PLAYER = players.HistoricPlayer(3) print("Welcome to the Rock, Paper & Scissor game!") print( "Please type in valid players: 'random', 'sequential', 'mostcommon' or 'historic'." ) player1 = input("Who is player 1? ") player2 = input("Who is player 2? ") def get_player(player): '''Getting the chosen player''' try: my_player = None if player == "random": my_player = RANDOM_PLAYER elif player == "sequential": my_player = SEQUENTIAL_PLAYER elif player == "mostcommon": my_player = MOSTCOMMON_PLAYER elif player == "historic": my_player = HISTORIC_PLAYER return my_player except: print("You did not type a valid playerclass") first_player = get_player(player1) second_player = get_player(player2) MULTIPLE_GAMES = MultipleGames(first_player, second_player, 100) MULTIPLE_GAMES.arrange_tournament()
def train_defence(): # ゲームボードの準備 kp = kakerlakenpoker.Kakerlakenpoker() p1_rndact = players.RandomPlayer(kp, PLAYER1) p2_rndact = players.RandomPlayer(kp, PLAYER2) # 環境と行動の次元数 obs_size = 40 n_actions = 2 #学習ゲーム回数 n_episodes = 3000 #カウンタの宣言 win = 0 miss = 0 # Q-functionとオプティマイザーのセットアップ q_func = qf.QFunction(obs_size, n_actions) if USE_GPU: q_func.to_gpu(0) optimizer = chainer.optimizers.Adam(eps=1e-2) optimizer.setup(q_func) # 報酬の割引率 gamma = 0.95 # Epsilon-greedyを使ってたまに冒険。50000ステップでend_epsilonとなる p1_explorer = chainerrl.explorers.LinearDecayEpsilonGreedy( start_epsilon=1.0, end_epsilon=0.3, decay_steps=50000, random_action_func=p1_rndact.random_defence_action_func) # Experience ReplayというDQNで用いる学習手法で使うバッファ replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity=10**6) agent_p1 = chainerrl.agents.DoubleDQN(q_func, optimizer, replay_buffer, gamma, p1_explorer, replay_start_size=500, target_update_interval=100) t1 = time.time() for i in range(1, n_episodes + 1): kp.reset() reward = 0 reward_avg = 0 turn = 0 while not kp.done: off_act = p2_rndact.random_offence_action_func() off_act_vec = np.zeros(8, dtype=np.float32) off_act_vec[off_act % 8] = 1 env = np.append(kp.get_env().copy(), off_act_vec) def_act = agent_p1.act_and_train(env.copy(), reward) reward += kp.step_and_reward(off_act, def_act, PLAYER2) kp.check_winner() if kp.done is True: if kp.winner == 1: reward += 100 win += 1 elif kp.winner == -1: reward += -100 else: reward += -100 if kp.miss is True: miss += 1 agent_p1.stop_episode_and_train(env.copy(), reward, True) else: # print("***Turn",turn,"***") # print(kp.show()) last_state = kp.get_env().copy() turn += 1 reward_avg += reward if i % N_INFO == 0: print("***Episodes", i, "***") print("win:", win) print("miss", miss) print("reward avg:", reward_avg / N_INFO) print("rnd:", p1_rndact.random_count) win = 0 reward_avg = 0 miss = 0 p1_rndact.random_count = 0 t2 = time.time() print("time:" + str(t2 - t1)) t1 = time.time() agent_p1.save("defence_model3000")
def main(): kp = Kakerlakenpoker() kp.reset() human_player = players.HumanPlayer() p1_rndact = players.RandomPlayer(kp, PLAYER1) # Q-functionとオプティマイザーのセットアップ off_q_func = qf.QFunction(32, 64) # q_func.to_gpu(0) off_optimizer = chainer.optimizers.Adam(eps=1e-2) off_optimizer.setup(off_q_func) gamma = 0.95 # Epsilon-greedyを使ってたまに冒険。50000ステップでend_epsilonとなる off_explorer = chainerrl.explorers.LinearDecayEpsilonGreedy( start_epsilon=1.0, end_epsilon=0.3, decay_steps=50000, random_action_func=p1_rndact.random_offence_action_func) # Experience ReplayというDQNで用いる学習手法で使うバッファ off_replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity=10**6) urayama_offence = chainerrl.agents.DoubleDQN(off_q_func, off_optimizer, off_replay_buffer, gamma, off_explorer, replay_start_size=500, target_update_interval=100) # Q-functionとオプティマイザーのセットアップ def_q_func = qf.QFunction(40, 2) # q_func.to_gpu(0) def_optimizer = chainer.optimizers.Adam(eps=1e-2) def_optimizer.setup(def_q_func) def_explorer = chainerrl.explorers.LinearDecayEpsilonGreedy( start_epsilon=1.0, end_epsilon=0.3, decay_steps=50000, random_action_func=p1_rndact.random_defence_action_func) # Experience ReplayというDQNで用いる学習手法で使うバッファ def_replay_buffer = chainerrl.replay_buffer.ReplayBuffer(capacity=10**6) urayama_defence = chainerrl.agents.DoubleDQN(def_q_func, def_optimizer, def_replay_buffer, gamma, def_explorer, replay_start_size=500, target_update_interval=100) # chainerrl.agent.load_npz_no_strict("offence_model3000",urayama_offence) # chainerrl.agent.load_npz_no_strict("defence_model3000",urayama_defence) urayama_offence.load("offence_model3000") urayama_defence.load("defence_model3000") offence_act = [urayama_offence.act, human_player.offence_act] defence_act = [urayama_defence.act, human_player.defence_act] turn = PLAYER1 #PLAYER1がurayama, PLAYER2がhuman turn_count = 1 while not kp.done: print("***Turn", str(turn_count), "***") kp.show_vs_URAYAMA() off_act = offence_act[turn](kp.get_env().copy()) off_act_vec = np.zeros(8, dtype=np.float32) off_act_vec[off_act % 8] = 1 if turn == PLAYER1: print("URAYAMA declare:" + str(off_act % 8)) else: print("Player declare:" + str(off_act % 8)) def_act = defence_act[PLAYER2 - turn](np.append( kp.get_env().copy(), off_act_vec)) ans = "True" if def_act == 1 else "Lie" if turn == PLAYER1: print("Player answer:" + ans) else: print("URAYAMA answer:" + ans) is_turn_change = kp.step(off_act, def_act, turn) kp.check_winner() if kp.done is True: if kp.winner == 1: print("URAYAMA win") elif kp.winner == -1: print("YOU win") else: print("Error") if kp.miss is True: print("MISS") if is_turn_change: turn = PLAYER1 if turn == PLAYER2 else PLAYER2 #ターンの交換 turn_count += 1
rho = 0.2 initialEpsilon = 1.0 epsilonDecay = 0.99 seed1 = None seed2 = None printturns = False trainIterations = 2000 randTestIterations = 200 aiTrainIterations = 2000 aiTestIterations = 200 p1 = players.AIPlayer(rho=rho, epsilon=initialEpsilon, seed=seed1) p2 = players.RandomPlayer(seed=None) print("Dots & Boxes AI Demo") print("--------------------------------------------") aiWins = 0 for i in range(trainIterations): g = game.Game() if i % 2 is 0: g.play(p1, p2, printturns=printturns) else: g.play(p2, p1, printturns=printturns) if g.score.index(max(g.score)) is p1.playernum: aiWins += 1 p1.epsilon *= epsilonDecay print("Train vs. Random:\t{} wins out of {}".format(aiWins, trainIterations))
util.save_to_file(data_out, data) print("=== Statistics ===") print("{} ({}%) wins by Player 1 ({})".format(p1_wins, 100.0 * p1_wins / iters, self.p1.name)) print("{} ({}%) wins by Player 2 ({})".format(p2_wins, 100.0 * p2_wins / iters, self.p2.name)) print("{} ({}%) ties".format(ties, 100.0 * ties / iters)) if __name__ == "__main__": net = nets.Connect4Network() data = util.read_from_file("test.csv") X, y = util.split_features_labels(data) X, y = util.shuffle_data(X, y) X = np.expand_dims(X, axis=1) X = torch.from_numpy(X).float() y = torch.from_numpy(y).float() net.fit(X, y, batch_size=32) #player_1 = players.DeepMinimaxPlayer("Susan", net, 4) player_1 = players.RandomPlayer("Bimbo") #player_1 = players.MinimaxPlayer("Max", 4) #player_2 = players.MinimaxPlayer("Min", 6) player_2 = players.DeepMinimaxPlayer("Susan", net, 4) gs = ConnectFourSimulator(player_1, player_2) gs.run(100, verbose=False, data_out="test.csv")