Ejemplo n.º 1
0
 def __init__(self, flags):
     """Initialize runner."""
     self.flags = flags
     self.agent_config = {'players': flags['players']}
     self.environment = rl_env.make('Hanabi-Full',
                                    num_players=flags['players'])
     self.agent_class = AGENT_CLASSES[flags['agent_class']]
Ejemplo n.º 2
0
 def __init__(self, flags):
     """Initialize runner."""
     self.flags = flags
     self.agent_config = {'players': flags['players']}
     self.environment = rl_env.make('Hanabi-Full',
                                    num_players=flags['players'])
     self.agent_class = AGENT_CLASSES[flags['agent_class']]
     self.game_state_wrappers = list()
     self.v = vec.ObservationVectorizer(self.environment)
Ejemplo n.º 3
0
 def __init__(self, flags):
     """Initialize runner."""
     self.flags = flags
     self.env = rl_env.make('Hanabi-Full', num_players=flags['players'])
     self.agent_config = {
         'players': flags['players'],
         'num_moves': self.env.num_moves(),
         'observation_size': self.env.vectorized_observation_shape()[0]
     }
     self.agent_class = AGENT_CLASSES[flags['agent_class']]
Ejemplo n.º 4
0
 def __init__(self, flags):
     """Initialize runner."""
     self.flags = flags
     self.agent_config = {
         'players': flags['players'],
         'player_id': 0,
         'mcts_types': flags['mcts_types']
     }
     self.environment = make('Hanabi-Full', num_players=flags['players'])
     self.agent_classes = [
         AGENT_CLASSES[agent_class]
         for agent_class in flags['agent_classes']
     ]
Ejemplo n.º 5
0
def create_environment(game_type='Hanabi-Small', num_players=4):
    """Creates the Hanabi environment.

    Args:
      game_type: Type of game to play. Currently the following are supported:
        Hanabi-Full: Regular game.
        Hanabi-Small: The small version of Hanabi, with 2 cards and 2 colours.
      num_players: Int, number of players to play this game.

    Returns:
      A Hanabi environment.
    """
    return rl_env.make(
        environment_name=game_type, num_players=num_players, pyhanabi_path=None)
Ejemplo n.º 6
0
def run(ix, initialize=False):

    # initialize env
    env = rl_env.make('Hanabi-Full', num_players=flags['players'])
    agent_config = {
        'players': flags['players'],
        'num_moves': env.num_moves(),
        'observation_size': env.vectorized_observation_shape()[0],
        'model_name': str(ix),
        'initialize': initialize
    }

    agent = NeuroEvoAgent(agent_config)

    avg_reward = 0
    avg_steps = 0

    for eps in range(flags['num_episodes']):
        obs = env.reset()  # Observation of all players
        done = False
        agent_id = 0

        while not done:
            ob = obs['player_observations'][agent_id]

            try:
                action = agent.act(ob)
            except ValueError:
                print('Something went wrong. Try to reinitialize the agents'
                      'pool by using --initialize True')
                exit()

            obs, reward, done, _ = env.step(action)

            avg_reward += reward
            avg_steps += 1

            if done:
                break

            # change player
            agent_id = (agent_id + 1) % flags['players']

    n_eps = float(flags['num_episodes'])
    avg_steps /= n_eps
    avg_reward /= n_eps

    agent.save(model_name=str(ix))
    scores[ix] = avg_reward * 1000 + avg_steps
Ejemplo n.º 7
0
    def __init__(self, flags):
        """Initialize runner."""
        self.flags = flags
        self.env = rl_env.make('Hanabi-Full', num_players=flags['players'])

        # create configurations
        self.agent_config, self.agent_2_config = self.generate_config(flags)

        # use configurations to create agent
        self.agent = load_agent(flags['agent_class'])(self.agent_config)

        if flags['agent2_class'] != flags['agent_class']:
            # use configurations to create second agent
            self.agent2 = load_agent(flags['agent2_class'])(
                self.agent_2_config)
Ejemplo n.º 8
0
 def __init__(self,
              args,
              game_type='Hanabi-Full',
              num_players=2,
              num_unique_agents=6,
              num_games=10):  # Changed from None to 10
     self.game_type = game_type
     self.num_players = num_players
     self.num_unique_agents = num_unique_agents
     self.num_games = num_games
     self.environment = rl_env.make(game_type, num_players=self.num_players)
     self.agent_config = {
         'players': self.num_players,
         'num_moves': self.environment.num_moves(),
         'observation_size':
         self.environment.vectorized_observation_shape()[0]
     }
     self.available_agents = import_agents(args.agentdir, num_unique_agents,
                                           self.agent_config)
Ejemplo n.º 9
0
 def __init__(self, flags):
     """Initialize runner."""
     self.flags = flags
     self.agent_config = {'players': flags['players']}
     self.environment = rl_env.make('Hanabi-Full',
                                    num_players=flags['players'])
     self.agent_class = [
         SimpleAgent,
         RandomAgent,
         LossAverseAgent,
         lambda config: create_tf_agent(
             self.environment, 'Rainbow',
             'agents/rainbow/tmp/hanabi_rainbow/checkpoints'),
         lambda config: create_tf_agent(
             self.environment, 'DQN',
             'agents/rainbow/tmp/hanabi_dqn/checkpoints'),
         lambda config: create_tf_agent(self.environment, 'Rainbow',
                                        'agents/rainbow/tmp/pretrained/'),
         HeuristicAgent,
     ]
Ejemplo n.º 10
0
    def __init__(self, agent_class, numAgents=-1, load=False, size=1000000):
        """
        Args:
            agent_class (string): the class of the agent, which can be one of:
                - 'SimpleAgent'
                - 'RainbowAgent'
                - 'RandomAgent'
            numAgents (int, optional): the number of agents
            load (boolean, optional): whether we have to load possible
                existent data of the given class of agents.
            size (int, optional): how many steps are going to be saved,
                default is 100K. This size is used to allocate memory at the beginning
        """

        self.size = size
        self.ptr = 0
        self.ep_start_id = self.ptr
        self.full = False
        self.path = os.path.join(self.path, agent_class)

        if not load and numAgents == -1:
            print(
                "Bad parameter initialization. Use either 'numAgents' or 'load' to initialize the object."
            )
            exit()
        else:
            if load:
                # load the configurations from file
                self.config = pickle.load(
                    open(os.path.join(self.path, "config.pickle"), "rb"))
                numAgents = self.config["numAgents"]

            else:
                self.config = {}  # create empty dict
                self.config["numAgents"] = numAgents  # insert config data

        try:
            # detect the size of the observations
            env = rl_env.make(num_players=numAgents)
            obs = env.reset()
            self.config["size_obs"] = len(
                obs['player_observations'][0]['vectorized'])

            # detect the size of move
            self.n_moves = env.num_moves()

            # initialize matrices for all values
            self.moves = np.empty(size, dtype=np.uint8)
            self.rs = np.empty(size)
            self.obs = np.empty((size, self.config["size_obs"]), dtype=bool)
            self.eps = []

            # initialize last episode
            self.last_ep = -1
        except BaseException:
            # if the environment can't be create, we still can load
            if numAgents == 2 or numAgents == 3:
                self.n_cards = 5
            elif numAgents == 4 or numAgents == 4:
                self.n_cards = 4
            else:
                print("ERROR: invalid number of players")
                return

            self.n_moves = numAgents * 10 + self.n_cards * 2

            print("WARNING: the environment could not be created.")
            print(
                "Some functionality may be compromised. You CAN still load data."
            )
 def __init__(self, numAgents, numEpisodes):
     self.eps = numEpisodes
     self.players = numAgents
     self.env = rl_env.make(num_players=numAgents)
def load_env(variant="Hanabi-Full", players=4):
    pyhanabi_env = rl_env.make(environment_name=variant, num_players=players)
    py_env = pyhanabi_env_wrapper.PyhanabiEnvWrapper(pyhanabi_env)

    return py_env
Ejemplo n.º 13
0
def main(args):
    """ Observations & actions generation.

    Generate binary observations & one-hot encoded action vectors based on game
    logs from running WTFWT agent.

    Observations are saved in the following format:

               turn 0   ...  turn n        turn 0   ...  turn n
    Game 0   [[[obs_0], ..., [obs_n]],    [[act_0], ..., [act_n]],
    Game 1    [[obs_0], ..., [obs_n]],    [[act_0], ..., [act_n]],
      ...
    Game m    [[obs_0], ..., [obs_n]],    [[act_0], ..., [act_n]]]

    Arguments:
        - args: Namespace
            Arguments taken from command line. To see details, run
            python3 create_WTFWT_data.py --help
    Raises:
        - Assertion errors for mismatches in WTFWT and DM HanabiEnv
        - Value Errors for parsing unknown formats.
    """
    print('Seed used: %d' % args.seed)
    # Handle by build_env.sh
    # Make hanabi_env & import it
    # run('(cd {}/ && cmake -Wno-dev . && make)'.format(PATH_HANABI_ENV), args.q)
    import rl_env

    random.seed(args.seed)
    combined_data = []
    # For specified number of games
    for i in range(args.num_games):
        game_data = [[], []]
        # Generate the game logs and decks
        s = random.randint(0, 2**31 - 1)  # seed for WTFWT
        cmd = ('cargo run -q --manifest-path {}/WTFWT/Cargo.toml -- -n 1 -o 1 '
               '-s {} -p {} -g info').format(PATH_ORIGINAL_AGENTS, s,
                                             args.num_players)
        debug = ['', ' -l debug'][args.debug]
        run(cmd + debug, args.q)

        with open('dk_cards.csv') as f_dk, open('rust_agent.csv') as f_log:
            reader = csv.reader(f_dk)
            dk = next(reader)[0].upper()
            # Deck in Rust Env starts from right and indexed from 1
            dk = [x[0] + str(int(x[1]) - 1) for x in dk.split('-')[::-1]]

            env = rl_env.make('Hanabi-Full', num_players=args.num_players)
            obs = env.reset(dk)

            header = (['pid', 'turn'] +
                      ['p%d_cards' % i for i in range(args.num_players)] + [
                          'discards', 'action', 'firework', 'rem_life',
                          'rem_info', 'rem_deck'
                      ])

            reader = csv.reader(f_log)
            # For each turn in a game
            for row in reader:
                row = dict(zip(header, row))
                if args.debug:
                    comp_test(env, row, obs, args)
                action = parse_action(row, args.num_players)
                # Store the data
                cur_obs = obs['player_observations'][obs['current_player']]
                vec_act = one_hot_vectorized_action(action, env.num_moves(),
                                                    cur_obs)
                game_data[0].append(b2int.convert(cur_obs['vectorized']))
                game_data[1].append(vec_act)
                # Advance the state
                obs, reward, done, info = env.step(action)
        assert (done is True)
        combined_data.append(game_data)

    savepath = os.path.join(
        args.savedir,
        'wtfwt_' + str(args.num_players) + '_' + str(args.num_games) + '.pkl')
    with open(savepath, 'wb') as f:
        pickle.dump(combined_data, f)
    os.remove('dk_cards.csv')
    os.remove('rust_agent.csv')
Ejemplo n.º 14
0
def load_hanabi_env(env_name="Hanabi-Full", num_players=4):
  pyhanabi_env = rl_env.make(environment_name=env_name, num_players=num_players)
  py_env = pyhanabi_env_wrapper.PyhanabiEnvWrapper(pyhanabi_env)
  return py_env
Ejemplo n.º 15
0
    def run(self):
        """Run episodes."""
        gin_files = ['agents/rainbow/configs/hanabi_rainbow_explicit.gin']
        run_experiment.load_gin_configs(gin_files, [])
        environment = rl_env.make('Hanabi-Full-CardKnowledge', num_players="2")
        #environment_name == "Hanabi-Full-CardKnowledge"):
        obs_stacker = run_experiment.create_obs_stacker(environment)
        agent = run_experiment.create_agent(
            environment, obs_stacker)  #verify it uses rainbow

        #get the checkpoint..
        base_dir = "agents/rainbow/data"
        checkpoint_file_prefix = "ckpt"
        checkpoint_dir = '{}/checkpoints'.format(base_dir)
        experiment_logger = logger.Logger('{}/logs'.format(base_dir))
        run_experiment.initialize_checkpointing(agent, experiment_logger,
                                                checkpoint_dir,
                                                checkpoint_file_prefix)

        obs_stacker.reset_stack()
        observations = environment.reset(
        )  # Full game observation, not to be passed to agents
        current_player, legal_moves, observation_vector = (
            run_experiment.parse_observations(observations,
                                              environment.num_moves(),
                                              obs_stacker))

        has_played = {current_player}
        action = agent.begin_episode(current_player, legal_moves,
                                     observation_vector)

        ### Stage-compliant printing related
        # observations["player_observations"] has an element for every player

        hands_list = get_list_cards(observations)
        pp(hands_list[0])
        pp(hands_list[1])
        hand_count = len(hands_list[0])
        step_list = []

        is_done = False

        reward_since_last_action = np.zeros(environment.players)
        score = 0
        while not is_done:
            pp("~~~~")
            # convert action into dict-like action
            lm = observations["player_observations"][current_player][
                "legal_moves"]
            lmi = observations["player_observations"][current_player][
                "legal_moves_as_int"]
            ind = 0
            action_unf = None
            for ind in range(0, len(lm)):
                if lmi[ind] == action:
                    action_unf = lm[ind]
            assert (action_unf is not None)

            ### Stage compliant printing
            #print(format_step(cpacopy, hand_count))
            ############ do the step (important!)
            observations, reward, is_done, _ = environment.step(action.item())
            pp(observations["current_player"], "doing", action_unf)
            i = 0
            for x in observations["player_observations"]:
                i += 1
                pp(i, "player")
                for k in x["observed_hands"]:
                    pret_list(k)
            cpa = action_unf["action_type"]
            cpacopy = action_unf.copy()
            if cpa == "PLAY" or cpa == "DISCARD":
                new_card = find_new_card(observations)
                pp("new_card:", new_card)

                cpacopy["new_card"] = new_card
            step_list.append(cpacopy)
            pp(observations["player_observations"][0]["fireworks"])

            # quit if done
            if is_done:
                fire = observations["player_observations"][0]["fireworks"]
                score = 0
                for x in fire:
                    score += fire[x]
                print("Score=", score, fire)
                break

            current_player, legal_moves, observation_vector = (
                run_experiment.parse_observations(observations,
                                                  environment.num_moves(),
                                                  obs_stacker))
            if current_player in has_played:
                action = agent.step(reward_since_last_action[current_player],
                                    current_player, legal_moves,
                                    observation_vector)
            else:
                # Each player begins the episode on their first turn (which may not be
                # the first move of the game).
                action = agent.begin_episode(current_player, legal_moves,
                                             observation_vector)
                has_played.add(current_player)
            #cur_ply = observation["current_player"]
            #observation = observations['player_observations'][cur_ply]
            #current_player_action = agent.act(observation)
            # Make an environment step.
            #print('Agent: {} action: {}'.format(observation['current_player'],
            #                                    current_player_action))
            ##hands_list = get_list_cards(observations)
        # for h in hands_list:
        #     i += 1
        #     print(i, format_hand(h))
        ######observations, reward, done, unused_info = environment.step(
        ########current_player_action)
        ### Finding the new card... ###
        ##cpa = current_player_action["action_type"]
        ##cpacopy = current_player_action.copy()
        ##if cpa == "PLAY" or cpa == "DISCARD":
        ##    cpacopy["new_card"] = find_new_card(observations)
        #print("new:",format_card(new_card))
        ### Stage compliant printing
        #print(format_step(cpacopy, hand_count))
        ##step_list.append(cpacopy)
        ###
        ###
        #print('Running episode: %d' % episode)
        #print('Max Reward: %.3f' % max(rewards))

        ### Stage compliant printing
        #pret_list(hands_list)
        s = ""
        for h in hands_list:
            s += format_hand(h) + "\n"
        for x in step_list:
            s += format_step(x, hand_count) + "\n"
        with open("test_{}".format(score), "w") as f:
            f.write(s)
        print(s)