Ejemplo n.º 1
0
def main(_):
    params = {}
    if FLAGS.symbolic:
        params = {'seed': FLAGS.seed, 'level_name': FLAGS.level_name}
        env_generator = symbolic_alchemy.get_symbolic_alchemy_level
    else:
        env_settings = dm_alchemy.EnvironmentSettings(
            seed=FLAGS.seed, level_name=FLAGS.level_name)
        params = {'name': FLAGS.docker_image_name, 'settings': env_settings}
        env_generator = dm_alchemy.load_from_docker

    with env_generator(**params) as env:

        agent = RandomAgent(env.action_spec())

        timestep = env.reset()
        score = 0
        while not timestep.last():
            action = agent.act(timestep)
            timestep = env.step(action)

            if timestep.reward:
                score += timestep.reward
                print('Total score: {:.2f}, reward: {:.2f}'.format(
                    score, timestep.reward))
Ejemplo n.º 2
0
    def move(self, game: Game, possible_steps=None):
        winning_step, loosing_steps, draw_steps, possible_steps = self.check_next_step(
            game, possible_steps)
        if winning_step is not None:
            return winning_step

        steps_to_exclude = []
        for step in possible_steps:
            game_clone = game.copy_and_move(step)
            winning_step2, loosing_steps2, draw_steps2, possible_steps2 = self.check_next_step(
                game_clone)
            if len(loosing_steps2) > 0:
                steps_to_exclude.append(step)
            draw_steps += draw_steps2

        if len(steps_to_exclude) > 0:
            possible_steps = [
                step2 for step2 in possible_steps
                if step2 not in steps_to_exclude
            ]

        if len(possible_steps) == 1:
            return possible_steps[0]
        elif len(possible_steps) > 1:
            return self.agent.move(game, possible_steps)
        else:
            agent = RandomAgent(self.label)
            return agent.move(game)
Ejemplo n.º 3
0
def main():
    try:
        shutil.rmtree('images')
        print("delete images directory")
    except OSError as e:
        print("Error: %s : %s" % ('images', e.strerror))

    gym.logger.set_level(INFO)
    start_date = date(2019, 5, 1)
    simulate_company_list = [2, 3, 4, 5, 6, 44, 300, 67, 100, 200]
    # simulate_company_list = [3]
    env = gym.make("AsxGym-v0",
                   start_date=start_date,
                   simulate_company_list=simulate_company_list)
    stock_agent = RandomAgent(env)
    # stock_agent = RandomAgent(env, min_volume=100, max_volume=500)
    # stock_agent = BuyAndKeepAgent(env, 3)

    observation = env.reset()
    for _ in range(200000 * 24):
        env.render()
        company_count = len(env.simulate_company_list)

        observation, reward, done, info = env.step(stock_agent.action())
        if done:
            env.insert_summary_images(30)
            observation = env.reset()
            stock_agent.reset()
        if observation is not None:
            asx_observation = AsxObservation(observation)
            print(asx_observation.to_json_obj())
            print(info)

    env.close()
Ejemplo n.º 4
0
def main(argv=None):
    '''Evaluate agent performances against RandomAgent and AIAgent'''

    logger = BriscolaLogger(BriscolaLogger.LoggerLevels.TEST)
    game = brisc.BriscolaGame(2, logger)

    # agent to be evaluated is RandomAgent or QAgent if a model is provided
    if FLAGS.model_dir:
        eval_agent = QAgent(network=FLAGS.network)
        eval_agent.load_model(FLAGS.model_dir)
        eval_agent.make_greedy()
    else:
        eval_agent = RandomAgent()

    # test agent against RandomAgent
    agents = [eval_agent, RandomAgent()]

    total_wins, points_history = evaluate(game, agents, FLAGS.num_evaluations)
    stats_plotter(agents, points_history, total_wins)

    # test agent against AIAgent
    agents = [eval_agent, AIAgent()]

    total_wins, points_history = evaluate(game, agents, FLAGS.num_evaluations)
    stats_plotter(agents, points_history, total_wins)
Ejemplo n.º 5
0
def run_baseline_comparison_v5(n_games=2500):

    experiment_name = 'baseline_comparison_v5'

    agent10 = RandomAgent(distribution='uniform')
    agent11 = RandomAgent(distribution='uniform_on_types')
    agent12 = RandomAgent(distribution='first_buy')

    multi_arena = ArenaMultiThread()
    list_of_agents = [agent10, agent11, agent12]

    results = multi_arena.all_vs_all(list_of_agents, n_games)

    if main_thread:
        print(' \n \n {}'.format(results.to_pandas()))
        print('\n \n \n')
        print(results)
        wins = results.to_pandas(param='wins').to_csv('wins.csv')
        vic_points = results.to_pandas(
            param='victory_points').to_csv('victory_points.csv')
        rewards = results.to_pandas(param='reward').to_csv('reward.csv')

        #leader_board = LeaderBoard(list_of_agents)
        #leader_board.load_from_file()
        #leader_board.register_from_games_statistics(results)
        #print(leader_board)
        #leader_board.save_to_file()

        plt.title('Average win rate over {} games per pair:'.format(2 *
                                                                    n_games))
        wins_pic = results.create_heatmap(param='wins', average=True, p2=2)
        plt.savefig('reports/wins.png')
        plt.clf()

        plt.title('Average reward over {} games per pair:'.format(2 * n_games))
        reward_pic = results.create_heatmap('reward', average=True, p2=2)
        plt.savefig('reports/reward.png')
        plt.clf()

        plt.title('Average victory points over {} games per pair:'.format(
            2 * n_games))
        vic_points_pic = results.create_heatmap('victory_points',
                                                average=True,
                                                p2=2)
        plt.savefig('reports/victory_points.png')
        plt.clf()

        plt.title('Average games played over {} games per pair:'.format(
            2 * n_games))
        vic_points_pic = results.create_heatmap('games',
                                                average=True,
                                                n_games=n_games,
                                                p2=2)
        plt.savefig('reports/games.png')
        plt.clf()
Ejemplo n.º 6
0
def run_experiment():
    trainer = MCTS_value_trainer()
    trainer.include_params_file(PARAMS_FILE)
    trainer.include_params_file(
        'gym_splendor_code/envs/mechanics/game_settings.py')

    if not CLUSTER:
        trainer.run_training_games_multi_process(
            opponent_to_train='self',
            baselines=[RandomAgent(distribution='first_buy'),
                       RandomAgent()],
            epochs=50,
            mcts_passes=15,
            n_test_games=0,
            exploration_ceofficient=0.41,
            experiment_name='MCTS local',
            weights_path=
            '/home/tomasz/ML_Research/splendor/gym-splendor/archive/weights_tt1/',
            neural_network_train_epochs=1,
            reset_network=True,
            confidence_threshold=1,
            confidence_limit=4,
            count_ratio=0.8,
            replay_buffer_n_games=50,
            use_neptune=True,
            tags=['local-run'],
            source_files=[__file__, PARAMS_FILE])

    if CLUSTER:
        trainer.run_training_games_multi_process(
            opponent_to_train='self',
            baselines=[
                RandomAgent(distribution='first_buy'),
                GreedyAgentBoost()
            ],
            epochs=250,
            mcts_passes=50,
            n_test_games=24,
            exploration_ceofficient=0.41,
            experiment_name='MCTS with NN',
            weights_path=
            '/net/archive/groups/plggluna/plgtodrzygozdz/weights_temp/',
            neural_network_train_epochs=1,
            reset_network=True,
            confidence_threshold=1,
            confidence_limit=4,
            count_ratio=0.7,
            replay_buffer_n_games=100,
            use_neptune=True,
            tags=['cluster-run'],
            source_files=[__file__, PARAMS_FILE])
def main(episode_count):
    env = gym.make('CartPole-v0')
    agent = RandomAgent(env.action_space.n)
    for i in range(episode_count):
        observation = env.reset()  # initialize the environment
        done = False
        step = 0
        while not done:
            env.render()
            action = agent.act(observation)
            next_observation, reward, done, info = env.step(action)
            if done:
                print("Episode finished after {} timesteps".format(step + 1))
            observation = next_observation
            step += 1
Ejemplo n.º 8
0
 def simulate(self, node: MCSTTreeNode):
     tmp_game = copy.deepcopy(node.game)
     tmp_game.debug = False
     tmp_agents = [RandomAgent(label) for label in tmp_game.labels]
     tmp_game.play(tmp_agents)
     winner = tmp_game.evaluate()
     return self.winner2score(winner, tmp_game.moves_num)
Ejemplo n.º 9
0
def train_agent(hype_space):

    print("----------------------")
    print("Evaluating model: ", hype_space)

    logger = BriscolaLogger(BriscolaLogger.LoggerLevels.TEST)
    game = brisc.BriscolaGame(2, logger)

    tf.reset_default_graph()

    # Initialize agents
    agents = []
    agent = QAgent(
        0,
        hype_space['epsilon_increment'],
        hype_space['epsilon_max'],
        hype_space['discount'],
        NETWORK,
        hype_space['layers'],
        hype_space['learning_rate'],
        hype_space['replace_target_iter'])

    agents.append(agent)
    agents.append(RandomAgent())

    best_total_wins = train(game, agents, NUM_EPOCHS, EVALUATE_EVERY, EVALUATE_FOR, MODEL_DIR)

    print ("Best total wins ----->", best_total_wins)
    best_total_loses = EVALUATE_FOR - best_total_wins
    return best_total_loses
Ejemplo n.º 10
0
def create_agent(conf, action_space, observation_space):
    if conf['agent'] == "dqn":
        return DQNAgent(
            action_space,
            observation_space,
            batch_size=conf['batch_size'],
            learning_rate=conf['learning_rate'],
            discount=conf['discount'],
            epsilon=conf['random_explore'])
    elif conf['agent'] == "conv_dqn":
        return ConvDQNAgent(
            action_space,
            observation_space,
            batch_size=conf['batch_size'],
            learning_rate=conf['learning_rate'],
            discount=conf['discount'],
            epsilon=conf['random_explore'])
    elif conf['agent'] == "tabular_q":
        return TabularQAgent(
            action_space,
            observation_space,
            q_init=conf['q_value_init'],
            learning_rate=conf['learning_rate'],
            discount=conf['discount'],
            epsilon=conf['random_explore'])
    elif conf['agent'] == "random":
        return RandomAgent(action_space, observation_space)
    else:
        raise ArgumentError("Agent type [%s] is not supported." %
                            conf['agent'])
def run():
    agent1 = RandomAgent(mpi_communicator=comm)
    agent2 = RandomAgent(mpi_communicator=comm)
    #
    agent3 = MultiMCTSAgent(1, 5, True, False)

    # random.randint.seed(100)
    arek = DeterministicMultiProcessArena()
    result = arek.run_one_duel_multi_process_deterministic(
        comm, [agent3, agent1])
    result2 = arek.run_one_duel_multi_process_deterministic(
        comm, [agent3, agent1])

    if main_process:
        print(result)
        print(result2)
Ejemplo n.º 12
0
def main():
    parser = setup_parser()
    args = parser.parse_args()
    env = gym.make(args.env)
    model_type = args.model

    if not model_type:
        raise ValueError("Please specify the model")

    model_config, train_config, load_config = get_configs(args)

    if model_type == "pg":
        model = PolicyGradient(env, **model_config)
    elif model_type == "ac":
        model = ActorCritic(env, **model_config)
    elif model_type == "gae":
        model = GeneralizedAdvantageEstimation(env, **model_config)
    elif model_type == "rnd":
        model = RandomAgent(env, **model_config)

    if args.load:
        model.load_model(**load_config)
    if args.train:
        reward_history, loss = model.train(**train_config)
        plot_rewards(reward_history)
    if args.evaluate:
        model.evaluate(n_episodes=10, n_steps=1000, render=args.render)
        plot_rewards(evaluation_results)
Ejemplo n.º 13
0
def produce_data(when_to_start, dump_p, n_games, filename, folder):
    list_of_agents = [RandomAgent(), GreedyAgentBoost(), MinMaxAgent()]

    arek = ArenaMultiThread()
    arek.start_collecting_states()
    arek.collect_only_from_middle_game(when_to_start, dump_p)
    arek.all_vs_all('deterministic', list_of_agents, n_games)
    arek.dump_collected_states(filename, folder)
Ejemplo n.º 14
0
 def __init__(self, mode, iteration_limit, rollout_repetition, choose_best):
     assert mode == 'dqn', 'You must provide mode of training'
     self.iteration_limit = iteration_limit
     self.rollout_repetition = rollout_repetition
     self.data_collector = TreeDataCollector()
     self.opponent = RandomAgent(distribution='first_buy')
     self.choose_best = choose_best
     self.env = gym_open_ai.make('splendor-v0')
Ejemplo n.º 15
0
 def set_agent(self, agent: Agent = None):
     # todo agent verify methods availability
     if not agent:
         random_agent = RandomAgent()
         self.agent = random_agent
     else:
         self.agent = agent
         self.agent.connect_player(self)
     print(f'Player {self.id} using {agent.__class__.__name__}')
Ejemplo n.º 16
0
def main():
    env = TicTacToeEnv()
    model = ValueModel(env.feature_vector_size, 100)

    # agent = SimpleAgent('agent_0', model, env)
    # agent = TDAgent('agent_0', model, env)
    # agent = ForwardAgent('agent_0', model, env)
    # agent = BackwardAgent('agent_0', model, env)
    agent = LeafAgent('agent_0', model, env)

    random_agent = RandomAgent(env)

    log_dir = "./log/leaf"

    summary_op = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(log_dir)

    scaffold = tf.train.Scaffold(summary_op=summary_op)
    with tf.train.MonitoredTrainingSession(checkpoint_dir=log_dir,
                                           scaffold=scaffold) as sess:
        agent.sess = sess
        env.sess = sess

        while True:
            episode_count = sess.run(agent.episode_count)
            if episode_count % 1000 == 0:
                results = random_agent.test(agent)

                sess.run(agent.update_random_agent_test_results,
                         feed_dict={
                             random_agent_test_: result
                             for random_agent_test_, result in zip(
                                 agent.random_agent_test_s, results)
                         })
                print(episode_count, ':', results)

                if results[2] + results[5] == 0:
                    final_summary = sess.run(summary_op)
                    summary_writer.add_summary(final_summary,
                                               global_step=episode_count)
                    break
            else:
                agent.train(.2)
            sess.run(agent.increment_episode_count)
Ejemplo n.º 17
0
    def __init__(self):

        # Board dimension
        self.nb_rows = 6
        self.nb_columns = 7
        self.done = False

        # nb_empty indicate the number of available space per column
        self.nb_empty = [self.nb_rows] * self.nb_columns

        # Save the board state
        self.state = np.zeros((self.nb_rows, self.nb_columns), dtype=int)

        # Learn about spaces here: http://gym.openai.com/docs/#spaces
        self.action_space = spaces.Discrete(self.nb_columns)
        self.observation_space = spaces.Box(low=-1,
                                            high=1,
                                            shape=(self.nb_rows,
                                                   self.nb_columns),
                                            dtype=np.int)

        # Tuple corresponding to the min and max possible rewards
        self.reward_range = (-10, 1)
        self.rewards = {
            "invalid": -10,
            "valid": 1 / 42,
            "won": 1,
            "lost": -1,
            "draw": 0,
        }

        # Render properties
        self.render_tokens = {}
        self.render_tokens[-1] = 'x'
        self.render_tokens[1] = 'o'
        self.render_tokens[0] = ' '

        # Random agent
        self.opponent = RandomAgent(self.action_space, self.state)

        # StableBaselines throws error if these are not defined
        self.spec = None
        self.metadata = None
Ejemplo n.º 18
0
def objective(trial):
    env = CurlingEnv(hard_mode=True)
    """
    agent1 = TDZero(str(random.randint(0, 100)),
                        training_mode=True,
                        alpha=trial.suggest_float('alpha', 0.1, 1.0),
                        gamma=trial.suggest_float('gamma', 0.1, 1.0),
                        epsilon=trial.suggest_float('epsilon', 0.9, 1.0),
                        decay_rate=trial.suggest_float('decay_rate', 0.9, 0.99999))
    """
    """
    agent1 = MonteCarlo("Monte Carlo",
                        training_mode=True,
                        action_space=env.action_space.n,
                        gamma=trial.suggest_float('gamma', 0.1, 1.0),
                        epsilon=trial.suggest_float('epsilon', 0.1, 1.0),
                        decay_rate=trial.suggest_float('decay_rate', 0.5, 0.9999))
    """
    agent1 = ActorCritic("Actor Critic",
                         training_mode=True,
                         action_space=env.action_space.n,
                         actor_lr=trial.suggest_float("actor_lr", 0.0001, 0.3),
                         critic_lr=trial.suggest_float("critic_lr", 0.0001,
                                                       0.3),
                         gamma=trial.suggest_float("gamma", 0.1, 0.9))
    agent2 = RandomAgent("Random", False, env.action_space.n)

    wins = []
    rolling_average = []

    for _ in tqdm(range(1000)):
        state = env.reset()
        coordinator = PlayerCoordinator(agent1, agent2, state)
        coordinator.start_episode()
        done = False
        while not done:
            action = coordinator.next_move(state)
            state, reward, done, _ = env.step(action)
            coordinator.inform_players(state, action, reward, done)
            coordinator.next_turn()

            if done:
                if reward[0] > reward[1]:
                    wins.append(1)
                else:
                    wins.append(0)

        coordinator.end_episode()
        if len(wins) > 100:
            rolling_average.append(np.mean(wins[-100:]))

    score = np.mean(rolling_average[-5000:])
    return score
Ejemplo n.º 19
0
def run(env_name, agent_name, nb_episodes, render_freq, render_mode):
    logger.set_level(logger.INFO)

    env = gym.make(env_name)

    # You provide the directory to write to (can be an existing
    # directory, including one with existing data -- all monitor files
    # will be namespaced). You can also dump to a tempdir if you'd
    # like: tempfile.mkdtemp().
    #outdir = '/tmp/random-agent-results'
    #video_callable = None if render_mode == 'human' else False

    #env = wrappers.Monitor(env, directory=outdir, force=True, video_callable=video_callable)
    #env = DynamicMonitor(env, directory=outdir, force=True, video_callable=video_callable)

    env.render(mode=render_mode)
    env.seed(0)

    if agent_name == 'RandomAgent':
        agent = RandomAgent(env.env.action_space)
    elif agent_name == 'EpsilonGreedyAgent':
        agent = EpsilonGreedy(env.env.action_space)
    elif agent_name == 'GradientBanditAgent':
        agent = GradientBandit(env.env.action_space)
    elif agent_name == 'ucb':
        agent = ucb(env.env.action_space)
    elif agent_name == 'ThompsonSampling':
        agent = ThompsonSampling(env.env.action_space)

    step = 0
    reward = 0
    done = False

    for episode in range(nb_episodes):
        print(f'--------- Episode {episode} ---------')
        ob = env.reset()
        agent = agent.reset()
        while True:
            step += 1
            # action space may have change
            # agent = EpsilonGreedy(env.env.action_space)
            action = agent.act(ob, reward, done)
            ob, reward, done, _ = env.step(action)
            if done:
                break
            if step % render_freq == 0:
                env.render()
            # Note there's no env.render() here. But the environment still can open window and
            # render if asked by env.monitor: it calls env.render('rgb_array') to record video.
            # Video is not recorded every episode, see capped_cubic_video_schedule for details.

    # Close the env and write monitor result info to disk
    env.env.close()
Ejemplo n.º 20
0
    def add_player(self, name, player_type, params={}):

        if name in self._players:
            print(f"Player '{name}' already exists.")

        player = [player_type]

        if player_type == "minimax":
            player.append(MiniMaxAgent(self, params))
        elif player_type == "deep-q":
            player.append(DeepQAgent(self, params))
        elif player_type == "random":
            player.append(RandomAgent(self, params))

        self._players[name] = player
        self._board.add_player(name)
class RandomizedAgent(Agent):
    def __init__(self, epsilon):
        super().__init__()
        self.smart_agent = GreedySearchAgent()
        self.epsilon = epsilon
        self.random_agent = RandomAgent(distribution='uniform')

    def choose_action(self, observation: SplendorObservation,
                      previous_actions: List[Action]):
        p = np.random.uniform(0, 1)
        if p < self.epsilon:
            return self.random_agent.choose_action(observation,
                                                   previous_actions)
        else:
            return self.smart_agent.choose_action(observation,
                                                  previous_actions)
def _make_frames(set_: Set, n: int):
    e = gym_super_mario_bros.make("SuperMarioBros-1-1-v0")
    e = JoypadSpace(e, RIGHT_ONLY)
    e = SkipWrapper(e, 5)
    e = CopyFrame(e)
    e = FrameStack(e, 4)
    e = NoopResetEnv(e, 4)
    a = RandomAgent(env=e)
    play(
        a,
        e,
        frames_directory=set_.path / "img",
        display=False,
        n=n * 10,
        save_each=15,
        state2img=lambda frames: cv2.cvtColor(np.vstack(frames._frames), cv2.COLOR_RGB2BGR),
    )
Ejemplo n.º 23
0
def main(argv=None):

    # Initializing the environment
    logger = BriscolaLogger(BriscolaLogger.LoggerLevels.TRAIN)
    game = brisc.BriscolaGame(2, logger)

    # Initialize agents
    agents = []
    agent = QAgent(FLAGS.epsilon, FLAGS.epsilon_increment, FLAGS.epsilon_max,
                   FLAGS.discount, FLAGS.network, FLAGS.layers,
                   FLAGS.learning_rate, FLAGS.replace_target_iter,
                   FLAGS.batch_size)
    agents.append(agent)
    agent = RandomAgent()
    agents.append(agent)

    train(game, agents, FLAGS.num_epochs, FLAGS.evaluate_every,
          FLAGS.num_evaluations, FLAGS.model_dir)
Ejemplo n.º 24
0
    def full_training(self, n_repetitions, alpha, epochs):

        self.prepare_training()
        for i in range(n_repetitions):
            if main_process:
                print('Game number = {}'.format(i))
            self.run_self_play('deterministic', alpha=alpha, epochs=epochs)
            agent_to_test = self.mcts_agent
            arena = MultiArena()
            results = arena.run_many_duels(
                'deterministic',
                [agent_to_test,
                 RandomAgent(distribution='first_buy')], 1, 24)
            if main_process:
                self.eval_policy.model.save_weights(
                    'Weights_i = {}.h5'.format(i))
                text_file = open("Results_{}.txt".format(i), "w")
                text_file.write(results.__repr__())
                text_file.close()
def run_comparison(n_games=1000):

    gohan = GreedyAgentBoost()

    goku = RandomAgent(distribution='uniform')
    print(
        fight_pit.run_many_duels([goku, gohan],
                                 number_of_games=n_games,
                                 shuffle_agents=True))

    goku = RandomAgent(distribution='uniform_on_types')
    print(
        fight_pit.run_many_duels([goku, gohan],
                                 number_of_games=n_games,
                                 shuffle_agents=True))

    goku = RandomAgent(distribution='first_buy')
    print(
        fight_pit.run_many_duels([goku, gohan],
                                 number_of_games=n_games,
                                 shuffle_agents=True))

    gohan = GreedyAgentBoost(weight=[100, 2.5, 1.5, 1, 0.1])

    goku = RandomAgent(distribution='uniform')
    print(
        fight_pit.run_many_duels([goku, gohan],
                                 number_of_games=n_games,
                                 shuffle_agents=True))

    goku = RandomAgent(distribution='uniform_on_types')
    print(
        fight_pit.run_many_duels([goku, gohan],
                                 number_of_games=n_games,
                                 shuffle_agents=True))

    goku = RandomAgent(distribution='first_buy')
    print(
        fight_pit.run_many_duels([goku, gohan],
                                 number_of_games=n_games,
                                 shuffle_agents=True))
Ejemplo n.º 26
0
from agents.random_agent import RandomAgent
from agents.greedy_agent import GreedyAgent, GreedyAgentBoost
from arena import Arena
import time
import random
import numpy as np

fight_pit = Arena()

goku = RandomAgent(distribution='first_buy')
#gohan = RandomAgent(distribution='uniform_on_types')
#gohan = RandomAgent(distribution='uniform')
gohan = GreedyAgent(weight=0.08)

g1 = GreedyAgentBoost("Greedy1", [100, 2, 2, 1, 0.1])
g2 = GreedyAgentBoost("Greedy2", [0, 0, 0, 0, 0])
g3 = GreedyAgentBoost("Greedy3", [10, 2, 2, 1, 0.2])
g4 = GreedyAgentBoost("Greedy4", [100, 0, 0, 1, 0.1])
g5 = GreedyAgentBoost(
    "Greedy5", [0.99954913, 0.01997425, 0.02001405, 0.01004779, 0.00101971])
g6 = GreedyAgentBoost(
    "Greedy6", [0.99953495, 0.02010871, 0.02010487, 0.01095619, 0.00113329])

gv1 = RandomAgent(distribution='first_buy')
gv2 = GreedyAgent(weight=0.1)
gv3 = GreedyAgentBoost("RandomAgent", [0, 0, 0, 0, 0])

g_list = {g1, g2, g3, g4, g5, g6}
gv_list = [gv1, gv2, gv3]
g_list_remove = set()
lr = 0.000005
Ejemplo n.º 27
0
   def __init__(self,
                gems_encoder_dim : int = None,
                price_encoder_dim : int = None,
                profit_encoder_dim : int = None,
                cards_points_dim: int = None,
                cards_dense1_dim: int = None,
                cards_dense2_dim: int = None,
                board_nobles_dense1_dim : int = None,
                board_nobles_dense2_dim : int = None,
                full_board_dense1_dim: int = None,
                full_board_dense2_dim: int = None,
                player_points_dim: int = None,
                player_nobles_dim: int = None,
                full_player_dense1_dim: int = None,
                full_player_dense2_dim: int = None,
                final_layer= None,
                data_transformer = None,
                network_name: str = None
                ):
       super().__init__()
       self.vectorizer = Vectorizer()
       self.final_layer = final_layer
       self.data_transformer = data_transformer

       self.params['data transormation'] = self.data_transformer.name
       self.params['final layer name'] = self.final_layer.name
       self.params['gems_encoder_dim'] = gems_encoder_dim
       self.params['gems_encoder_dim'] = gems_encoder_dim
       self.params['price_encoder_dim'] = price_encoder_dim
       self.params['profit_encoder_dim'] = profit_encoder_dim
       self.params['cards_points_dim'] = cards_points_dim
       self.params['cards_dense1_dim'] = cards_dense1_dim
       self.params['cards_dense2_dim'] = cards_dense2_dim
       self.params['board_nobles_dense1_dim'] = board_nobles_dense1_dim
       self.params['board_nobles_dense2_dim'] = board_nobles_dense2_dim
       self.params['full_board_dense1_dim']= full_board_dense1_dim
       self.params['full_board_dense2_dim'] = full_board_dense2_dim
       self.params['player_points_dim'] = player_points_dim
       self.params['player_nobles_dim'] = player_nobles_dim
       self.params['full_player_dense1_dim'] = full_player_dense1_dim
       self.params['full_player_dense2_dim']= full_player_dense2_dim


       self.arena = Arena()
       self.network_agent = ValueNNAgent(self)
       self.easy_opp = RandomAgent(distribution='first_buy')
       self.medium_opp = GreedyAgentBoost()
       self.hard_opp = MinMaxAgent()


       self.neptune_monitor = NeptuneMonitor()
       self.network_name = network_name

       self.gems_encoder = GemsEncoder(gems_encoder_dim)
       self.price_encoder = PriceEncoder(price_encoder_dim)
       self.board_encoder = BoardEncoder(self.gems_encoder,
                                          ManyNoblesEncoder(price_encoder_dim,
                                                            board_nobles_dense1_dim,
                                                            board_nobles_dense2_dim),
                                          ManyCardsEncoder(MAX_CARDS_ON_BORD,
                                                           profit_encoder_dim,
                                                           price_encoder_dim,
                                                           cards_points_dim,
                                                           cards_dense1_dim,
                                                           cards_dense2_dim
                                                           ),
                                          full_board_dense1_dim,
                                          full_board_dense2_dim)
       self.player_encoder = PlayerEncoder(self.gems_encoder,
                                            self.price_encoder,
                                            ManyCardsEncoder(MAX_RESERVED_CARDS,
                                                             profit_encoder_dim,
                                                             price_encoder_dim,
                                                             cards_points_dim,
                                                             cards_dense1_dim,
                                                             cards_dense2_dim
                                                             ),
                                            player_points_dim,
                                            player_nobles_dim,
                                            full_player_dense1_dim,
                                            full_player_dense2_dim)
       active_player_input = PlayersInputGenerator('active_').inputs
       other_player_input = PlayersInputGenerator('other_').inputs
       board_input = self.board_encoder.inputs
       self.inputs = board_input + active_player_input + other_player_input
       board_encoded = self.board_encoder(board_input)
       active_player_encoded = self.player_encoder(active_player_input)
       other_player_encoded = self.player_encoder(other_player_input)
       full_state = Concatenate(axis=-1)([board_encoded, active_player_encoded, other_player_encoded])
       full_state = Dense(full_player_dense1_dim, activation='relu')(full_state)
       final_state = Dense(full_player_dense2_dim, activation='relu')(full_state)
       result = self.final_layer(final_state)
       self.layer = Model(inputs = self.inputs, outputs = final_state, name = 'full_state_splendor_estimator')
       self.network = Model(inputs = self.inputs, outputs = result, name = 'full_state_splendor_estimator')
       self.network.compile(Adam(), loss='mean_squared_error')
       self.params['Model name'] = 'Average pooling model'
       self.params['optimizer_name'] = 'Adam'
Ejemplo n.º 28
0
            break
        reward, terminate = s.step(agents[agent].get_move(s), agent)
        if visualise:
            print(s)
        if terminate:
            if stop_point == -1:
                stop_point = agent
        agent = (agent + 1) % len(agents)
    if visualise:
        print("Game Over!\n")

    best_score = -math.inf
    best_agent = None

    for agent in agents:
        if visualise:
            print(str(agent) + " ended with " + str(s.scores[agent.index]))
        if s.scores[agent.index] > best_score:
            best_score = s.scores[agent.index]
            best_agent = agent

    if visualise:
        print(str(best_agent) + " wins!")
    return {agent: score for agent, score in zip(agents, s.scores)}


if __name__ == "__main__":
    play_game(agents=[OneLookAheadAgent(0, 2),
                      RandomAgent(1, 2)],
              visualise=True)
Ejemplo n.º 29
0
import gym
import gym_connect4
from agents.random_agent import RandomAgent

if __name__ == "__main__":
    # Build environment
    print("[.] Build Environment")
    env = gym.make('gym_connect4:connect4-v0')

    # Create random agent
    print("[.] Create Random Agent")
    agent = RandomAgent(env.action_space, env.observation_space)

    print(env.action_space.n)

    # Init environment
    done = False
    obs = env.reset()

    # Run game
    print("[.] Running game")
    while not done:
        obs, reward, done, info = env.step(agent.get_action(obs))

    # Final render
    print("[+] Done.")
    print("Infos: ", info)
    print("Final board: ")
    env.render()

    # Close environment
Ejemplo n.º 30
0
import gin

from gym_splendor_code.envs.mechanics.abstract_observation import DeterministicObservation
from gym_splendor_code.envs.mechanics.state import State
from nn_models.architectures.average_pool_v0 import StateEncoder, ValueRegressor, IdentityTransformer
gin.parse_config_file(
    '/home/tomasz/ML_Research/splendor/gym-splendor/experiments/MCTS_series_1/params.gin'
)

from agents.random_agent import RandomAgent
from agents.single_mcts_agent import SingleMCTSAgent
from arena.arena import Arena
from monte_carlo_tree_search.evaluation_policies.value_evaluator_nn import ValueEvaluator
from monte_carlo_tree_search.mcts_algorithms.single_process.single_mcts import SingleMCTS

arek = Arena()

a1 = RandomAgent()
a2 = SingleMCTSAgent(5, ValueEvaluator(), 0.6, True, True)
#
results = arek.run_one_duel('deterministic', [a1, a2])

# state1 = State()
# fufu = SingleMCTS(5, 0.6,  ValueEvaluator())
# fufu.create_root(DeterministicObservation(state1))
# fufu.run_mcts_pass()
Ejemplo n.º 31
0
def main():

    agent1 = RandomAgent(Reversi())
    agent1.name = "1"
    agent1.color = BLACK

    agent2 = RandomMonteCarloAgent(Reversi(), 1)
    agent2.name = "2"
    agent2.color = WHITE

    engine_ref = Reversi()
    agent_ref = EdaxAgent(engine_ref)

    iteration = 1000

    while True:
        predict_correct = 0
        predict_wrong = 0
        total = 1000

        #file_name = 'training/saved_conv_networks/reversi-nn-' + str(iteration)
        #while not os.path.isfile(file_name):
        #    time.sleep(60)
        #time.sleep(1)
        #agent2.load_nn(file_name)

        for _ in xrange(total):
            agent1.reset_engine()
            agent2.reset_engine()
            agent_ref.reset_engine()

            current_agent = agent1
            opponent_agent = agent2

            ignore_moves = 6

            while not current_agent.get_engine().is_full():
                move = current_agent.get_best_move()
                assert current_agent.color != current_agent.get_engine().get_current_player()
                if move is None:
                    #print('No move for ', current_agent.name)
                    tmp_engine = copy.deepcopy(current_agent.get_engine())
                    if not tmp_engine.get_legal_moves():
                        tmp_engine.apply_move(None)
                        if not tmp_engine.get_legal_moves():
                            # game finished
                            break
                    
                if ignore_moves <= 0 and (not (move is None)) and current_agent == agent2:
                    move_ref = agent_ref.get_best_move_without_apply()
                    print(str(move_ref) + ', ' + str(move))
                    if move_ref == move:
                        predict_correct += 1
                    else:
                        predict_wrong += 1

                if ignore_moves > 0:
                    ignore_moves -= 1

                agent_ref.apply_opponent_move(move)
                opponent_agent.apply_opponent_move(move)
                assert opponent_agent.color == opponent_agent.get_engine().get_current_player()
                if current_agent is agent1:
                    current_agent = agent2
                    opponent_agent = agent1
                else:
                    current_agent = agent1
                    opponent_agent = agent2
            print('result: ' + str(predict_correct) + ', ' + str(predict_wrong))

        print('correct: ' + str(float(predict_correct) / (predict_correct + predict_wrong)))

        iteration += 1000