def run(self):
     log_dir = "./logs/bastille/MOISMCTSWithRandomRollouts_Vs_TabularQLearning/" + str(
         time())
     print(str(log_dir))
     print(
         TensorboardWindJammersRunner(MOISMCTSWithRandomRolloutsAgent(
             100, SafeWindJammersRunner(RandomAgent(), RandomAgent())),
                                      TabularQLearningAgent(),
                                      checkpoint=100,
                                      log_dir=log_dir).run(1000000))
 def run(self):
     log_dir = "./logs/bastille/Random_Vs_RandomRollout_100/" + str(time())
     print(str(log_dir))
     print(
         TensorboardWindJammersRunner(RandomAgent(),
                                      RandomRolloutAgent(
                                          100,
                                          SafeWindJammersRunner(
                                              RandomAgent(),
                                              RandomAgent())),
                                      checkpoint=100,
                                      log_dir=log_dir).run(1000000))
 def run(self):
     log_dir = "./logs/bastille/TabularQLearning_MOISMCTSWithRandomRolloutsExpertThenApprentice/" + str(
         time())
     print(str(log_dir))
     print(
         TensorboardWindJammersRunner(
             TabularQLearningAgent(),
             MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent(
                 100, SafeWindJammersRunner(RandomAgent(), RandomAgent()),
                 8, 12),
             checkpoint=100,
             log_dir=log_dir).run(1000000))
 def run(self):
     log_dir = "./logs/bastille/TabularQLearning_MOISMCTSWithValueNetwork/" + str(
         time())
     print(str(log_dir))
     print(
         TensorboardWindJammersRunner(TabularQLearningAgent(),
                                      MOISMCTSWithValueNetworkAgent(
                                          100,
                                          SafeWindJammersRunner(
                                              RandomAgent(),
                                              RandomAgent())),
                                      checkpoint=100,
                                      log_dir=log_dir).run(1000000))
Beispiel #5
0
def main():
    # Set log level.
    logging.basicConfig(level=logging.INFO)

    # Set a random seed for consistency in agent AND environment.
    if config.RANDOM_SEED is not None:
        np.random.seed(config.RANDOM_SEED)

    # Make environment.
    env = EnvCatcher(grid_size=config.GRID_SIZE,
                     env_type='episodic',
                     verbose=False,
                     random_seed=config.RANDOM_SEED)

    # Make agent.
    agent = RandomAgent(actions=list(range(env.action_space)))

    # Run the RL Loop.
    episode_returns = rl.run_loop(agent=agent,
                                  env=env,
                                  max_num_steps=config.MAX_NUM_STEPS,
                                  report_every_n=config.REPORT_EVERY_N)

    # Save the data.
    date_string = time.strftime("%Y%m%d-%H%M%S")
    filename = ('random_grid_{}_nep_{}_'.format(
        config.GRID_SIZE, len(episode_returns)) + date_string + '.csv')
    rl.save_episode_returns(filename=filename, episode_returns=episode_returns)
Beispiel #6
0
def run_training_batch(n_games):
    random.seed(time.localtime())
    agent = Agent()
    agent_types = [agent, CustomAgent(), RandomAgent()]
    #   agent_weights = [1.0/3.0, 1.0/3.0, 1]
    #   ^ Weights needed for even distribution of learning agent and random agent
    #   agent_weights = [1.0/3.0, 1, 1]
    #   ^ Weights needed for even distribution of learning agent and custom agent
    agent_weights = [1.0 / 9.0, 4.0 / 9.0, 1]

    start = time.perf_counter()

    for i in range(n_games):
        try:
            list_of_agents = []
            list_of_agents.append(agent)
            for i in range(3):
                rolled_weight = random.random()
                for index in range(len(agent_types)):
                    if rolled_weight <= agent_weights[index]:
                        list_of_agents.append(agent_types[index])
                        break
            random.shuffle(list_of_agents)
            active_game = Game(i, list_of_agents)
            active_game.play_game()
        except Exception as err:
            print(f"Error encountered... {err}")
    end = time.perf_counter()
    print(f"Ran {n_games} in {(end-start)} seconds.")
    return True
 def run(self):
     log_dir = "./logs/bastille/Random_Vs_ReinforceClassic/" + str(time())
     print(str(log_dir))
     print(
         TensorboardWindJammersRunner(RandomAgent(),
                                      ReinforceClassicAgent(8, 12),
                                      checkpoint=100,
                                      log_dir=log_dir).run(1000000))
Beispiel #8
0
 def test_on_round_finish(self):
     initial_player_points = []
     for player in self.temp_players:
         player.set_controlling_agent(RandomAgent())
         self.temp_deck.deal_cards_to(player)
         initial_player_points.append(player.get_round_points())
     self.test_round.begin_play()
     self.assertEqual(len(self.temp_deck.get_card_list()), 0)
 def run(self):
     log_dir = "./logs/bastille/Random_Vs_DoubleQLearning/" + str(time())
     print(str(log_dir))
     print(
         TensorboardWindJammersRunner(RandomAgent(),
                                      DoubleQLearningAgent(),
                                      checkpoint=100,
                                      log_dir=log_dir).run(1000000))
Beispiel #10
0
def test_full_replay_cycle():
    from environment.battlesnake_environment import BattlesnakeEnvironment
    from agents.RandomAgent import RandomAgent
    from environment.Battlesnake.importer.Importer import Importer
    from environment.Battlesnake.renderer.game_renderer import GameRenderer

    agents = [RandomAgent(), RandomAgent()]

    env = BattlesnakeEnvironment(
        width=15,
        height=15,
        agents=agents,
        act_timeout=0.1,
        export_games=True
    )

    env.reset()

    while not env.game.is_game_over():
        env.step()

    assert os.path.exists(env.exporter.outpath)
    assert os.path.getsize(env.exporter.outpath) > 0
    game, turns, move_list = Importer.load_replay_file(env.exporter.outpath)

    width, height = move_list[0].width, move_list[0].width
    num_snakes = len(move_list[0].snakes)

    renderer = GameRenderer(width, height, num_snakes)

    assert len(move_list) >= 1
    assert len(move_list) == turns
    assert game

    for move in move_list:
        renderer.display(move)
    
    os.unlink(env.exporter.outpath)

    # TODO: Could be moved special place, so that always at the end
    import shutil
    shutil.rmtree("replays", ignore_errors=True)
    shutil.rmtree("replay_test", ignore_errors=True)
 def run(self):
     log_dir = "./logs/bastille/Random_Vs_PPOWithMultipleTrajectoriesMultiOutputs" + str(
         time())
     print(str(log_dir))
     print(
         TensorboardWindJammersRunner(
             RandomAgent(),
             PPOWithMultipleTrajectoriesMultiOutputsAgent(8, 12),
             checkpoint=100,
             log_dir=log_dir).run(1000000))
Beispiel #12
0
  def setUp(self):
    # Set a random seed for consistency in agent AND environment.
    if RANDOM_SEED is not None:
        np.random.seed(RANDOM_SEED)

    # Make environment.
    self.env = EnvCatcher(grid_size=GRID_SIZE, 
                     env_type='episodic', 
                     verbose=False, 
                     random_seed=RANDOM_SEED)

    # Make agent.
    self.agent = RandomAgent(actions=list(range(self.env.action_space)))
Beispiel #13
0
def sample_env():
    from agents.RandomAgent import RandomAgent
    from agents.SimpleAgent_solution import SimpleAgent
    from environment.battlesnake_environment import BattlesnakeEnvironment
    
    agents = [RandomAgent(), SimpleAgent()]
    env = BattlesnakeEnvironment(
        width=15,
        height=15,
        agents=agents,
        act_timeout=0.2
    )
    env.reset()
    return env
Beispiel #14
0
 def test_file_out_behavior(self):
     for player in self.temp_players:
         player.set_controlling_agent(RandomAgent())
     self.temp_game.play_game()
     with open(self.test_round.get_file_out_name(), 'rb') as input:
         file_data = pickle.load(input)
         for i in range(len(file_data)):
             self.assertTrue(self.test_round._get_file_out_data()[i]["trick_history"].all() == file_data[i]["trick_history"].all())
             self.assertGreater(self.test_round._get_file_out_data()[i]["trick_history"].sum(), 0)
             self.assertTrue(self.test_round._get_file_out_data()[i]["trick_point_history"].all() == file_data[i]["trick_point_history"].all())
             self.assertGreaterEqual(self.test_round._get_file_out_data()[i]["trick_point_history"].sum(), 0)
             self.assertTrue(self.test_round._get_file_out_data()[i]["player_partners"].all() == file_data[i]["player_partners"].all())
             self.assertGreater(self.test_round._get_file_out_data()[i]["player_partners"].sum(), 0)
             self.assertTrue(self.test_round._get_file_out_data()[i]["call_matrix"].all() == file_data[i]["call_matrix"].all())
             self.assertGreater(self.test_round._get_file_out_data()[i]["call_matrix"].sum(), 0)
             self.assertTrue(self.test_round._get_file_out_data()[i]["player_cards_in_hand_history"].all() == file_data[i]["player_cards_in_hand_history"].all())
             self.assertGreater(self.test_round._get_file_out_data()[i]["player_cards_in_hand_history"].sum(), 0)
             self.assertTrue(self.test_round._get_file_out_data()[i]["player_point_history"].all() == file_data[i]["player_point_history"].all())
             self.assertGreater(self.test_round._get_file_out_data()[i]["player_point_history"].sum(), 0)
             self.assertTrue(self.test_round._get_file_out_data()[i]["player_partner_prediction_history"].all() == file_data[i]["player_partner_prediction_history"].all())
             for e in range(len(file_data[i]["player_score_history"])):
                 self.assertTrue(self.test_round._get_file_out_data()[i]["player_score_history"][e] == file_data[i]["player_score_history"][e])
Beispiel #15
0
def main():
    """Running parameters"""
    atari_game: str = "Qbert-v4"
    # atari_game: str = "Pong-v4"
    number_of_episodes: int = 20000  # Number of episodes to run
    episode_number: int = 1
    env = gym.make(atari_game)

    "Create agents"
    random_agent: RandomAgent = RandomAgent(env)
    q_table_agent: QTableAgent = QTableAgent(env)

    while episode_number <= number_of_episodes:
        random_agent.play_an_episode()
        q_table_agent.play_an_episode()
        print()

        if episode_number % 100 == 0:
            q_table_agent.compare_performance_to(random_agent, 100)
            print()

        episode_number += 1

    print(q_table_agent)
Beispiel #16
0
 def run(self):
     if self.opponent == "RandomAgent":
         log_dir1 = self.log_dir_root + "DoubleQLearningAgent_VS_RandomAgent_" + self.time
         print(log_dir1)
         print(TensorboardTicTacToeRunner(DoubleQLearningAgent(),
                                          RandomAgent(),
                                          log_and_reset_score_history_threshold=10000,
                                          log_dir=log_dir1).run(100000000))
     elif self.opponent == "TabularQLearningAgent":
         log_dir2 = self.log_dir_root + "DoubleQLearningAgent_VS_TabularQLearningAgent_" + self.time
         print(log_dir2)
         print(TensorboardTicTacToeRunner(DoubleQLearningAgent(),
                                          TabularQLearningAgent(),
                                          log_and_reset_score_history_threshold=10000,
                                          log_dir=log_dir2).run(100000000))
     elif self.opponent == "DeepQLearningAgent":
         log_dir3 = self.log_dir_root + "DoubleQLearningAgent_VS_DeepQLearningAgent_" + self.time
         print(log_dir3)
         print(TensorboardTicTacToeRunner(DoubleQLearningAgent(),
                                          DeepQLearningAgent(9, 9),
                                          log_and_reset_score_history_threshold=10000,
                                          log_dir=log_dir3).run(100000000))
     elif self.opponent == "ReinforceClassicAgent":
         log_dir4 = self.log_dir_root + "DoubleQLearningAgent_VS_ReinforceClassicAgent_" + self.time
         print(log_dir4)
         print(TensorboardTicTacToeRunner(DoubleQLearningAgent(),
                                          ReinforceClassicAgent(9, 9),
                                          log_and_reset_score_history_threshold=10000,
                                          log_dir=log_dir4).run(100000000))
     elif self.opponent == "ReinforceClassicWithMultipleTrajectoriesAgent":
         log_dir5 = self.log_dir_root + "DoubleQLearningAgent_VS_ReinforceClassicWithMultipleTrajectoriesAgent_" + self.time
         print(log_dir5)
         print(TensorboardTicTacToeRunner(DoubleQLearningAgent(),
                                          ReinforceClassicWithMultipleTrajectoriesAgent(9, 9),
                                          log_and_reset_score_history_threshold=10000,
                                          log_dir=log_dir5).run(100000000))
     elif self.opponent == "PPOWithMultipleTrajectoriesMultiOutputsAgent":
         log_dir6 = self.log_dir_root + "DoubleQLearningAgent_VS_PPOWithMultipleTrajectoriesMultiOutputsAgent_" + self.time
         print(log_dir6)
         print(TensorboardTicTacToeRunner(DoubleQLearningAgent(),
                                          PPOWithMultipleTrajectoriesMultiOutputsAgent(9, 9),
                                          log_and_reset_score_history_threshold=10000,
                                          log_dir=log_dir6).run(100000000))
     elif self.opponent == "MOISMCTSWithRandomRolloutsAgent":
         log_dir7 = self.log_dir_root + "DoubleQLearningAgent_VS_MOISMCTSWithRandomRolloutsAgent_" + self.time
         print(log_dir7)
         print(TensorboardTicTacToeRunner(DoubleQLearningAgent(),
                                          MOISMCTSWithRandomRolloutsAgent(100,
                                                                          SafeTicTacToeRunner(RandomAgent(),
                                                                                              RandomAgent())),
                                          log_and_reset_score_history_threshold=10000,
                                          log_dir=log_dir7).run(1000000000))
     elif self.opponent == "MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent":
         log_dir8 = self.log_dir_root + "DoubleQLearningAgent_VS_MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent_" + self.time
         print(log_dir8)
         print(TensorboardTicTacToeRunner(DoubleQLearningAgent(),
                                          MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent(100,
                                                                                              SafeTicTacToeRunner(
                                                                                                  RandomAgent(),
                                                                                                  RandomAgent()),9,9),
                                          log_and_reset_score_history_threshold=10000,
                                          log_dir=log_dir8).run(1000000000))
     elif self.opponent == "MOISMCTSWithValueNetworkAgent":
         log_dir9 = self.log_dir_root + "DoubleQLearningAgent_VS_MOISMCTSWithValueNetworkAgent_" + self.time
         print(log_dir9)
         print(TensorboardTicTacToeRunner(DoubleQLearningAgent(),
                                          MOISMCTSWithValueNetworkAgent(100,
                                                                        SafeTicTacToeRunner(RandomAgent(),
                                                                                            RandomAgent())),
                                          log_and_reset_score_history_threshold=10000,
                                          log_dir=log_dir9).run(1000000000))
     elif self.opponent == "DoubleQLearningAgent":
         log_dir10 = self.log_dir_root + "DoubleQLearningAgent_VS_DoubleQLearningAgent_" + self.time
         print(log_dir10)
         print(TensorboardTicTacToeRunner(DoubleQLearningAgent(),
                                          DoubleQLearningAgent(),
                                          log_and_reset_score_history_threshold=10000,
                                          log_dir=log_dir9).run(1000000000))
     elif self.opponent == "RandomRolloutAgent":
         nb_rollouts = 3
         log_dir11 = self.log_dir_root + "RandomAgent_VS_RandomRolloutAgent(" + str(nb_rollouts) + ")_" + self.time
         print(log_dir11)
         print(TensorboardTicTacToeRunner(RandomAgent(),
                                          RandomRolloutAgent(nb_rollouts,
                                              SafeTicTacToeRunner(
                                                  RandomAgent(),
                                                  RandomAgent())),
                                          log_and_reset_score_history_threshold=10000,
                                          log_dir=log_dir11).run(1000000000))
     else:
         print("Unknown opponent")
Beispiel #17
0
def run(agent_id, is_training, load_dir):
    env = gym.make('LunarLanderContinuous-v2')

    time_format = strftime("%Y%m%d%H%M%S", gmtime())
    outdir = './results/' + time_format + '-' + agent_id + '/'

    if not os.path.exists(outdir + 'video/'):
        os.makedirs(outdir + 'video/')

    #Enable below comment to record video
    #env = wrappers.Monitor(env, directory=outdir + 'video/', force=True)
    env.seed(123)
    np.random.seed(123)

    #set up logger
    logging.basicConfig(filename=outdir + 'application.log',
                        level=logging.INFO)
    logger = logging.getLogger('gym')

    agent = None
    if (agent_id == 'RandomAgent'):
        agent = RandomAgent(env)
    elif (agent_id == 'DDPG'):
        agent = DDPG(env)
        agent.build_models()
    elif (agent_id == 'RBFAgent'):
        agent = RBFAgent(env)
    else:
        logger.error("Invalid Agent chosen!")
        return

    if load_dir is not None:
        logger.info("Load model at " + load_dir)
        #agent.load_weight(load_dir + '/')
        agent.load_model(load_dir + '/')

    # Initialize variables
    episode = np.int16(1)
    step = np.int16(0)
    max_steps = 1000000
    max_episodes = 1100 + episode

    if not is_training:
        logger.info("it is now testing")
        K.set_learning_phase(0)
        max_episodes = 5 + episode
    else:
        K.set_learning_phase(1)

    file_output = outdir + 'reward.csv'
    labels = ['episode', 'reward']
    with open(file_output, 'w') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(labels)

        observation = None
        episode_reward = None

        try:
            while step < max_steps and episode < max_episodes:

                if observation is None:  # start of a new episode
                    logger.info("Start episode " + str(episode))
                    episode_reward = np.float32(0)

                    # Obtain the initial observation by resetting the environment.
                    agent.reset_episode()
                    observation = deepcopy(env.reset())

                action = agent.act(observation)

                reward = np.float32(0)
                done = False

                next_state, r, done, info = env.step(action)
                next_state = deepcopy(next_state)
                reward = r

                if is_training:
                    agent.step(observation, action, reward, next_state, done)

                episode_reward += reward
                step += 1

                observation = next_state

                if episode % 20 == 0:
                    logger.info(
                        "State-Action: {}, {}, {}, {}, reward: {}".format(
                            observation[0], observation[1], action[0],
                            action[1], reward))

                if done:
                    # Act on the final state
                    # Step on final state but without adding to memory as next state is the reset state
                    if is_training:
                        action = agent.act(next_state)
                        agent.step_without_memory()

                    to_write = [episode] + [episode_reward]
                    writer.writerow(to_write)
                    if episode % 20 == 0:
                        csvfile.flush()
                    if episode % 20 == 0:
                        agent.save_weight(outdir)

                    episode += 1
                    observation = None
                    episode_reward = None

        except KeyboardInterrupt:
            csvfile.flush()
            agent.save_weight(outdir)

        # Close the env
        env.close()
Beispiel #18
0
 def test_play_with_learning_agent(self):
     self.temp_player_list[0].set_controlling_agent(Agent())
     for index in range(1, 4):
         self.temp_player_list[index].set_controlling_agent(RandomAgent())
     self.temp_game.play_game()
     self.assertGreaterEqual(max(self.temp_game.get_score_list()), 42)
Beispiel #19
0
 def test_play_game(self):
     for player in self.temp_player_list:
         player.set_controlling_agent(RandomAgent())
     self.temp_game.play_game()
     self.assertGreaterEqual(max(self.temp_game.get_score_list()), 42)
Beispiel #20
0
 def test_update_scores(self):
     for player in self.temp_player_list:
         player.set_controlling_agent(RandomAgent())
     self.temp_game.begin_round()
     self.assertNotEqual(sum(self.temp_game.get_score_list()), 0)
Beispiel #21
0
    #                                  MOISMCTSWithRandomRolloutsAgent(100,
    #                                                                  SafeTicTacToeRunner(RandomAgent(), RandomAgent())),
    #                                  log_and_reset_score_history_threshold=10000,
    #                                  log_dir=log_dir).run(1000000000))
    #
    # log_dir = "./logs/Random_vs_all/RandomAgent_VS_MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent_" + str(time())
    # print(log_dir)
    # print(TensorboardTicTacToeRunner(RandomAgent(),
    #                                  MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent(100, SafeTicTacToeRunner(
    #                                      RandomAgent(), RandomAgent()),9,9),
    #                                  log_and_reset_score_history_threshold=10000,
    #                                  log_dir=log_dir).run(1000000000))
    #
    log_dir = "./logs/Random_vs_all/RandomAgent_VS_MOISMCTSWithValueNetworkAgent_" + str(time())
    print(log_dir)
    print(TensorboardTicTacToeRunner(RandomAgent(),
                                     MOISMCTSWithValueNetworkAgent(100,"1",
                                                                   SafeTicTacToeRunner(RandomAgent(), RandomAgent())),
                                     log_and_reset_score_history_threshold=10000,
                                     log_dir=log_dir).run(1000000000))

    # AGENTS EXAMPLES :
    # CommandLineAgent()
    # RandomAgent()
    # RandomRolloutAgent(3, SafeTicTacToeRunner(RandomAgent(), RandomAgent()))
    # TabularQLearningAgent()
    # DeepQLearningAgent(9,9)
    # ReinforceClassicAgent(9,9)
    # ReinforceClassicWithMultipleTrajectoriesAgent(9,9)
    # PPOWithMultipleTrajectoriesMultiOutputsAgent(9,9)
    # MOISMCTSWithRandomRolloutsAgent(100, SafeTicTacToeRunner(RandomAgent(), RandomAgent()))
Beispiel #22
0
            scores = self.generate_midgame_env(num_agent_init_card)["scores"]
            print scores
            cnt_win[np.argmin(scores)] += 1
        print cnt_win

    def test_66(self, num_round=100):
        cnt_win = [
            0,
        ] * len(self.agent_list)
        for i in range(num_round):
            scores = [
                0,
            ] * len(self.agent_list)
            while max(scores) < 66:
                score = self.generate_midgame_env(
                    num_agent_init_card)["scores"]
                scores = [
                    scores[j] + score[j] for j in range(len(self.agent_list))
                ]
            cnt_win[np.argmin(scores)] += 1
            print "Round %d :" % i, scores, cnt_win
        print cnt_win


if __name__ == "__main__":
    gameEmulator = GameEmulator()
    #自己的Agent(调用execute_action的那个)编号默认为0,应当被第一个添加进去
    gameEmulator.add_agent(KeyboardAgent())
    gameEmulator.add_agent(RandomAgent())
    gameEmulator.emulate(1)
                if self.print_and_reset_score_history_threshold is not None and \
                        round_id % self.print_and_reset_score_history_threshold == 0:
                    print(score_history /
                          self.print_and_reset_score_history_threshold)
                    if self.prev_history is not None and \
                            score_history[0] == self.prev_history[0] and \
                            score_history[1] == self.prev_history[1] and \
                            score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.
                            replace_player1_with_commandline_after_similar_results
                            is not None and self.stuck_on_same_score >= self.
                            replace_player1_with_commandline_after_similar_results
                        ):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0
        return tuple(score_history)


if __name__ == "__main__":
    print("MOISMCTSWithRandomRolloutsAgent VS RandomAgent")
    print(
        BasicTicTacToeRunner(
            MOISMCTSWithRandomRolloutsAgent(
                100, SafeTicTacToeRunner(RandomAgent(), RandomAgent())),
            RandomAgent(),
            print_and_reset_score_history_threshold=1000).run(1000))
Beispiel #24
0
def main(ppo=False, dqn=False, ddqn=False, pg=False, random=False):
    """
    main

    Parameters
    ----------
    ppo : BOOLEAN, optional
        SET TO TRUE TO RUN PPO AGENT. The default is False.
    dqn : BOOLEAN, optional
        SET TO TRUE TO RUN DQN AGENT. The default is False.
    ddqn : BOOLEAN, optional
        SET TO TRUE TO RUN DDQN AGENT. The default is False.
    pg : BOOLEAN, optional
        SET TO TRUE TO RUN PG AGENT. The default is False.
    random : BOOLEAN, optional
        SET TO TRUE TO RUN RANDOM AGENT. The default is False.

    Returns
    -------
    None.

    """

    # Running DQN Agent
    if dqn:
        env = gym.make('PongDeterministic-v4')
        action_size = env.action_space.n
        agent = DqnAgent(action_size)
        nn = AtariModel()
        nn.build_atari_model(action_size)
        model = nn.model
        memory = RingBuffer(dqn_settings.MEMORY_SIZE)

        for i in range(dqn_settings.NUMBER_EPISODES):
            frame = env.reset()
            frame = agent.image_preprocessing(frame)
            state = (frame, frame, frame, frame)
            env.render()
            finished = False
            summed_reward = 0

            while not finished:
                action = agent.choose_action(state, model)
                next_frame, reward, finished, _ = env.step(action)
                next_frame = agent.image_preprocessing(next_frame)
                reward = agent.transform_reward(reward)
                next_state = (next_frame, state[0], state[1], state[2])
                summed_reward += reward
                memory.append((state, action, next_state, reward, finished))
                state = next_state
                env.render()

                if (i > dqn_settings.ITERATIONS_BEFORE_FIT):
                    minibatch = memory.sample_random_batch(
                        dqn_settings.BATCH_SIZE)
                    agent.fit_batch(model, minibatch, action_size)

            if (agent.epsilon > dqn_settings.FINAL_EPSILON):
                agent.epsilon = agent.epsilon * dqn_settings.GAMMA
            print("Iteration:", i, " Reward:", summed_reward, "Epsilon:",
                  agent.epsilon)
            f = open("rewards.txt", "a")
            f.write(str(summed_reward) + "\n")
            f.close()
            if (i % 100 == 0):
                model.save('models/saved_model')

    # Running PPO Agent
    elif ppo:
        policy = PpoAgent()
        run_ppo(policy)

    # Running DDQN Agent
    elif ddqn:
        ddqn_agent = DdqnAgent()
        ddqn_agent.run_ddqn()

    # Running PG Agent
    elif pg:
        pg_agent = PgAgent()
        pg_agent.run_pg()

    # Running Random Agent
    elif random:
        random_agent = RandomAgent()
        random_agent.run_random()

    # If no agent is selected a message will print asking to select one
    else:
        print(
            'No agent selected to run! Please select an agent: dqn, ppo, ddqn, pg, random'
        )
Beispiel #25
0
from environment.battlesnake_environment import BattlesnakeEnvironment
from agents.RandomAgent import RandomAgent
from agents.HumanPlayer import HumanPlayer
import pygame

nb_random_player = 2
num_fruits = 1

agents = [HumanPlayer()]

for i in range(nb_random_player):
    agents.append(RandomAgent())

env = BattlesnakeEnvironment(
    width=15,
    height=15,
    num_fruits=num_fruits,
    agents=agents,
)

env.reset()
env.render()

while True:

    env.handle_input()
    env.step()
    env.render()

    pygame.time.wait(250)
Beispiel #26
0
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.replace_player1_with_commandline_after_similar_results is not None and
                            self.stuck_on_same_score >= self.replace_player1_with_commandline_after_similar_results):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0
                    score_history = np.array((0, 0, 0))
                    self.execution_time = np.array((0.0, 0.0))
        return tuple(score_history)


if __name__ == "__main__":

    number = [1000, 10000, 100000, 1000000]
    versus_name = ['RandomAgent', 'Tabular', 'DQN', 'DDQN', 'Reinforce', 'Reinforce A2C Style', 'PPO', 'MCTS']
    versus_agent = [RandomAgent(),
                    TabularQLearningAgent(),
                    DeepQLearningAgent(9, 9),
                    DoubleDeepQLearningAgent(9, 9),
                    ReinforceClassicAgent(9, 9),
                    ReinforceClassicWithMultipleTrajectoriesAgent(9, 9),
                    PPOWithMultipleTrajectoriesMultiOutputsAgent(9, 9),
                    MOISMCTSWithValueNetworkAgent(9, 9, 2)]
    versus = [versus_name, versus_agent]

	
    for num in number:
        for i in range(len(versus_name)):
            with open("D:/DEEP_LEARNING/Reinforcement/TabularVS" + str(versus[0][i]) +"_NB_"+ str(num) + ".csv", 'w+') as f: #Ici change TabularVS par le nom de l'agent que tu lance contre tout le reste
                print("New Fight" + str(versus[0][i]) + " " + str(num))
                begin = time()
Beispiel #27
0
    #                                  TabularQLearningAgent(),
    #                                  log_and_reset_score_history_threshold=1000,
    #                                  log_dir=log_dir).run(1000000000))

    # log_dir = "./logs/TabularQLearning_VS_TabularQLearning_" + str(time())
    # print(log_dir)
    # print(TensorboardPower4Runner(TabularQLearningAgent(),
    #                                  TabularQLearningAgent(),
    #                                  log_and_reset_score_history_threshold=1000,
    #                                  log_dir=log_dir).run(1000000000))

    log_dir = "./logs/RandomRollout_VS_TabularQLearning_" + str(time())
    print(log_dir)
    print(
        TensorboardPower4Runner(RandomRolloutAgent(
            3, SafePower4Runner(RandomAgent(), RandomAgent())),
                                TabularQLearningAgent(),
                                log_and_reset_score_history_threshold=1000,
                                log_dir=log_dir).run(1000000000))

    # AGENTS EXAMPLES :
    # CommandLineAgent()
    # RandomAgent()
    # RandomRolloutAgent(3, SafeTicTacToeRunner(RandomAgent(), RandomAgent()))
    # TabularQLearningAgent()
    # DeepQLearningAgent(9,9)
    # ReinforceClassicAgent(9,9)
    # ReinforceClassicWithMultipleTrajectoriesAgent(9,9)
    # PPOWithMultipleTrajectoriesMultiOutputsAgent(9,9)
    # MOISMCTSWithRandomRolloutsAgent(100, SafeTicTacToeRunner(RandomAgent(), RandomAgent()))
    # MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent(100, SafeTicTacToeRunner(RandomAgent(), RandomAgent()),9,9)
Beispiel #28
0
                round_step += 1

            self.writer.add_summary(
                tf.Summary(value=[
                    tf.Summary.Value(
                        tag="agent1_action_mean_duration",
                        simple_value=self.mean_action_duration_sum[0] /
                        round_step),
                    tf.Summary.Value(
                        tag="agent2_action_mean_duration",
                        simple_value=self.mean_action_duration_sum[1] /
                        round_step),
                    tf.Summary.Value(
                        tag="agent1_accumulated_reward",
                        simple_value=self.mean_accumulated_reward_sum[0]),
                    tf.Summary.Value(
                        tag="agent2_accumulated_reward",
                        simple_value=self.mean_accumulated_reward_sum[1])
                ], ), episode_id)

            episode_id += 1


if __name__ == "__main__":
    print("Rdm vs Rdm")
    print(
        TensorboardInstrumentedWindJammersRunner(
            TabularQLearningAgent(),
            RandomAgent(),
            log_dir_root="./logs/Rdm_Vs_Rdm").run(1000))
def run():

    log_dir = "./logs/bastilleMP/ReinforceWithMultipleTraj_Vs_TabularQLearning/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(
            ReinforceClassicWithMultipleTrajectoriesAgent(8, 12),
            TabularQLearningAgent(),
            checkpoint=100,
            log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/DeepQLearning_Vs_TabularQLearning/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(DeepQLearningAgent(8, 12),
                                     TabularQLearningAgent(),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/MOISMCTSWithRandomRollouts_Vs_TabularQLearning/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(MOISMCTSWithRandomRolloutsAgent(
            100, SafeWindJammersRunner(RandomAgent(), RandomAgent())),
                                     TabularQLearningAgent(),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/ReinforceClassicWithMultipleTrajectories_Vs_TabularQLearningAgent/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(
            ReinforceClassicWithMultipleTrajectoriesAgent(8, 12),
            TabularQLearningAgent(),
            checkpoint=100,
            log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/Random_Vs_TabularQLearningAgent/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(RandomAgent(),
                                     TabularQLearningAgent(),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/Random_Vs_RandomRollout_100/" + str(time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(RandomAgent(),
                                     RandomRolloutAgent(
                                         100,
                                         SafeWindJammersRunner(
                                             RandomAgent(), RandomAgent())),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/Random_Vs_DeepQLearning/" + str(time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(RandomAgent(),
                                     DeepQLearningAgent(8, 12),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/Random_Vs_DoubleQLearning/" + str(time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(RandomAgent(),
                                     DoubleQLearningAgent(),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/Random_Vs_ReinforceClassic/" + str(time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(RandomAgent(),
                                     ReinforceClassicAgent(8, 12),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/Random_Vs_ReinforceClassicWithMultipleTrajectories/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(
            RandomAgent(),
            ReinforceClassicWithMultipleTrajectoriesAgent(8, 12),
            checkpoint=100,
            log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/Random_Vs_PPOWithMultipleTrajectoriesMultiOutputs" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(
            RandomAgent(),
            PPOWithMultipleTrajectoriesMultiOutputsAgent(8, 12),
            checkpoint=100,
            log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/Random_Vs_MOISMCTSWithRandomRollouts/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(RandomAgent(),
                                     MOISMCTSWithRandomRolloutsAgent(
                                         100,
                                         SafeWindJammersRunner(
                                             RandomAgent(), RandomAgent())),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/Random_Vs_MOISMCTSWithRandomRolloutsExpertThenApprentice/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(
            RandomAgent(),
            MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent(
                100, SafeWindJammersRunner(RandomAgent(), RandomAgent()), 8,
                12),
            checkpoint=100,
            log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/Random_Vs_MOISMCTSWithValueNetwork/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(RandomAgent(),
                                     MOISMCTSWithValueNetworkAgent(
                                         100,
                                         SafeWindJammersRunner(
                                             RandomAgent(), RandomAgent())),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/TabularQLearning_RandomRollout_100/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(TabularQLearningAgent(),
                                     RandomRolloutAgent(
                                         100,
                                         SafeWindJammersRunner(
                                             RandomAgent(), RandomAgent())),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/TabularQLearning_DeepQLearning/" + str(time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(TabularQLearningAgent(),
                                     DeepQLearningAgent(8, 12),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/TabularQLearning_DoubleQLearning/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(TabularQLearningAgent(),
                                     DoubleQLearningAgent(),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/TabularQLearning_ReinforceClassic/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(TabularQLearningAgent(),
                                     ReinforceClassicAgent(8, 12),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/TabularQLearning_ReinforceClassicWithMultipleTrajectories/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(
            TabularQLearningAgent(),
            ReinforceClassicWithMultipleTrajectoriesAgent(8, 12),
            checkpoint=100,
            log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/TabularQLearning_PPOWithMultipleTrajectoriesMultiOutputs" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(
            TabularQLearningAgent(),
            PPOWithMultipleTrajectoriesMultiOutputsAgent(8, 12),
            checkpoint=100,
            log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/TabularQLearning_MOISMCTSWithRandomRollouts/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(TabularQLearningAgent(),
                                     MOISMCTSWithRandomRolloutsAgent(
                                         100,
                                         SafeWindJammersRunner(
                                             RandomAgent(), RandomAgent())),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/TabularQLearning_MOISMCTSWithRandomRolloutsExpertThenApprentice/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(
            TabularQLearningAgent(),
            MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent(
                100, SafeWindJammersRunner(RandomAgent(), RandomAgent()), 8,
                12),
            checkpoint=100,
            log_dir=log_dir).run(100000))

    log_dir = "./logs/bastilleMP/TabularQLearning_MOISMCTSWithValueNetwork/" + str(
        time())
    print(str(log_dir))
    print(
        TensorboardWindJammersRunner(TabularQLearningAgent(),
                                     MOISMCTSWithValueNetworkAgent(
                                         100,
                                         SafeWindJammersRunner(
                                             RandomAgent(), RandomAgent())),
                                     checkpoint=100,
                                     log_dir=log_dir).run(100000))
Beispiel #30
0
                input("Which call do you want to make?(enter an index)"))
        return call_to_play_index

    def play_card(self, a_player, a_game):
        valid_play_list = a_player.get_valid_play_list()
        cards_in_hand_list = [
            card.get_card_id() for card in a_player.get_cards_in_hand()
        ]
        print(f"Your cards in hand are: {cards_in_hand_list}")
        playable_cards_list = [
            cards_in_hand_list[index] for index in range(len(valid_play_list))
            if valid_play_list[index] != 0
        ]
        print(f"Your playable cards are: {playable_cards_list}")
        current_trick = a_game.get_trick()
        cards_on_trick = [
            str(card) for card in current_trick.get_played_cards_list()
        ]
        print(f"The cards on trick are: {cards_on_trick}")
        card_to_play = -1
        while card_to_play not in playable_cards_list:
            card_to_play = int(input("Which card do you want to play?"))
        for index in range(len(cards_in_hand_list)):
            if card_to_play == cards_in_hand_list[index]:
                return index


if __name__ == "__main__":
    agent_types = [HumanAgent(), Agent(), CustomAgent(), RandomAgent()]
    active_game = Game(0, agent_types)
    active_game.play_game()