Esempio n. 1
0
def get_action_costs():
    # Create a new environment, read the config and record the action costs
    env = make_environment('halite')
    action_costs = np.zeros((len(ACTION_MAPPING)))
    for k in ACTION_MAPPING:
        if ACTION_MAPPING[k] == CONVERT:
            action_costs[k] = env.configuration.convertCost
        elif ACTION_MAPPING[k] == SPAWN:
            action_costs[k] = env.configuration.spawnCost
        else:
            action_costs[k] = 0

    return action_costs
Esempio n. 2
0
def get_input_output_shapes(config):
    # Create a new environment, perform the preprocessing and record the shape
    env = make_environment('halite')
    env.reset(num_agents=config['num_agents_per_game'])
    env_configuration = env.configuration
    env_observation = env.state[0].observation
    obs_input = state_to_input(structured_env_obs(env_configuration,
                                                  env_observation,
                                                  active_id=0),
                               num_mirror_dim=config['num_mirror_dim'])
    num_actions = len(ACTION_MAPPING)

    return obs_input.shape, num_actions, config['num_q_functions']
Esempio n. 3
0
def record_videos(agent_path,
                  num_agents_per_game,
                  rng_action_seeds,
                  extension_override=None,
                  config_override_agents=None,
                  deterministic_games=False,
                  env_seed_deterministic=0,
                  deterministic_extension=None,
                  first_game_recording=None):
    print("Generating videos of iteration {}".format(agent_path))
    env_configuration = {"agentExec": "LOCAL"}
    if deterministic_games:
        env_configuration["randomSeed"] = env_seed_deterministic
    env = make_environment(
        "halite", configuration=env_configuration
    )  #, configuration={"agentTimeout": 10000, "actTimeout": 10000})
    config = load_configs([agent_path])[0]
    env_configuration = env.configuration

    def my_agent(observation, config_id):
        config = AGENT_CONFIGS[config_id]
        rng_action_seed = rng_action_seeds[config_id]
        active_id = observation.player
        current_observation = utils.structured_env_obs(env_configuration,
                                                       observation, active_id)
        player_obs = observation.players[active_id]

        mapped_actions, _, _ = get_config_or_callable_actions(
            config, current_observation, player_obs, observation,
            env_configuration, rng_action_seed)

        return mapped_actions

    if config_override_agents is None:
        AGENT_CONFIGS = []
        for i in range(num_agents_per_game):
            AGENT_CONFIGS.append(sample_from_config(config))
    else:
        AGENT_CONFIGS = [
            sample_from_config_or_path(p, return_callable=True)
            for p in config_override_agents
        ]

    # For some reason this needs to be verbose - list comprehension breaks the
    # stochasticity of the agents.
    config_id_agents = [
        lambda observation: my_agent(observation, 0),
        lambda observation: my_agent(observation, 1),
        lambda observation: my_agent(observation, 2),
        lambda observation: my_agent(observation, 3),
    ][:num_agents_per_game]

    for video_type in ["self play", "random opponent"][:1]:
        original_video_type = video_type
        if config_override_agents is not None and video_type == "self play":
            video_type = "; ".join([
                a.rsplit('/', 1)[-1][:-3] for a in config_override_agents[1:]
            ])

        if original_video_type == "self play" and first_game_recording is not None:
            game_recording = first_game_recording
        else:
            env.reset(num_agents=num_agents_per_game)
            agents = config_id_agents if original_video_type == "self play" else [
                config_id_agents[0]
            ] + ["random"] * (num_agents_per_game - 1)

            env.run(agents)
            game_recording = env.render(mode="html", width=800, height=600)

        # Save the HTML recording in the videos folder
        folder, extension = tuple(agent_path.rsplit('/', 1))
        videos_folder = os.path.join(folder, 'Videos')
        Path(videos_folder).mkdir(parents=True, exist_ok=True)
        ext = extension[:-5] if extension_override is None else extension_override
        ext += deterministic_extension if deterministic_games else ''
        video_path = os.path.join(videos_folder,
                                  ext + ' - ' + video_type + '.html')
        with open(video_path, "w") as f:
            f.write(game_recording)
  for i in range(num_agents):
    agent_path = agent_full_paths[i]
    agent_paths = [agent_path, agent_path, agent_path, agent_path]
    video_name = agent_extensions[i][:-3] + " ***self play***"
    agents_paths_video_names.append((agent_paths, video_name))

if agents_paths_video_names:
  # Load all agent callables once (not really that much more performant)
  agent_callables = {}
  for i in range(num_agents): 
    agent_path = agent_full_paths[i]
    agent_file = environment_utils.read_file(agent_path)
    agent_callables[agent_path] = environment_utils.get_last_callable(
      agent_file)
    
  env = make_environment("halite", configuration={"agentExec": "LOCAL"})
  for agent_paths, video_name in agents_paths_video_names:
    agents = []
    for p in agent_paths:
      agents.append(agent_callables[p])
      
    env.reset(num_agents=len(agents))
    env.run(agents)
    
    # Save the HTML recording in the videos folder
    game_recording = env.render(mode="html", width=800, height=600)
    videos_folder = os.path.join(agents_folder, '../Videos')
    Path(videos_folder).mkdir(parents=True, exist_ok=True)
    video_path = os.path.join(videos_folder, video_name+'.html')
    with open(video_path,"w") as f:
      f.write(game_recording)
Esempio n. 5
0
def collect_experience_single_game(game_agent_paths, game_agents, num_agents,
                                   verbose, game_id, env_random_seed,
                                   act_random_seeds, record_game,
                                   episode_steps_override,
                                   early_episode_termination, rule_actions_id):
    episode_start_time = time.time()

    # Generate reproducible data for better debugging
    utils.set_seed(env_random_seed)

    game_agents = [
        a if isinstance(a, dict) else (kaggle_agent.get_last_callable(a))
        for a in game_agents
    ]
    config_game_agents = [
        a if isinstance(a, dict) else "text_agent" for a in (game_agents)
    ]

    # Add option to shuffle the location of the main agent - for now this serves
    # for testing the stateful history logic.
    first_rule_agent = game_agents.pop(0)
    game_agents.insert(rule_actions_id, first_rule_agent)

    env_config = {"randomSeed": env_random_seed}
    if episode_steps_override is not None:
        env_config["episodeSteps"] = episode_steps_override
    env = make_environment('halite', configuration=env_config)
    env.reset(num_agents=num_agents)
    max_episode_steps = env.configuration.episodeSteps
    if early_episode_termination is not None:
        max_episode_steps = min(max_episode_steps, early_episode_termination)
    halite_scores = np.full((max_episode_steps, num_agents), np.nan)
    action_delays = np.full((max_episode_steps - 1, num_agents), np.nan)
    first_get_actions_durations = np.full(max_episode_steps - 1, np.nan)
    first_box_in_durations = np.full(max_episode_steps - 1, np.nan)
    first_history_durations = np.full(max_episode_steps - 1, np.nan)
    first_ship_scores_durations = np.full(max_episode_steps - 1, np.nan)
    first_ship_plans_durations = np.full(max_episode_steps - 1, np.nan)
    first_ship_map_durations = np.full(max_episode_steps - 1, np.nan)
    halite_scores[0] = env.state[0].observation.players[0][0]
    total_halite_spent = np.zeros(num_agents).tolist()

    initial_obs = utils.structured_env_obs(env.configuration,
                                           env.state[0].observation, 0)
    initial_halite_setup = initial_obs['halite']
    initial_agents_setup = np.zeros_like(initial_halite_setup)
    for i, (_, _, ships, _) in enumerate(initial_obs['rewards_bases_ships']):
        initial_agents_setup = initial_agents_setup + (i + 1) * ships

    # Take actions until the game is terminated
    episode_step = 0
    num_lost_ships = np.zeros((max_episode_steps - 1, num_agents),
                              dtype=np.int)
    first_agent_step_details = []
    first_agent_ship_counts = np.zeros(max_episode_steps - 1)
    ship_counts = np.full((max_episode_steps - 1, num_agents), np.nan)
    histories = [{} for i in range(num_agents)]
    while not env.done:
        env_observation = env.state[0].observation
        player_mapped_actions = []
        for active_id in range(num_agents):
            agent_status = env.state[active_id].status
            players = env.state[0].observation.players
            if agent_status == 'ACTIVE':
                current_observation = utils.structured_env_obs(
                    env.configuration, env_observation, active_id)
                player_obs = players[active_id]
                env_observation.player = active_id
                step_start_time = time.time()
                mapped_actions, updated_history, halite_spent, step_details = (
                    rule_utils.get_config_or_callable_actions(
                        game_agents[active_id], current_observation,
                        player_obs, env_observation, env.configuration,
                        histories[active_id], act_random_seeds[active_id]))
                histories[active_id] = updated_history
                ship_counts[current_observation['step'],
                            active_id] = len(player_obs[2])
                if active_id == rule_actions_id:
                    first_agent_step_details.append(step_details)
                    first_get_actions_durations[episode_step] = step_details[
                        'get_actions_duration']
                    first_box_in_durations[episode_step] = step_details[
                        'box_in_duration']
                    first_history_durations[episode_step] = step_details[
                        'history_start_duration']
                    first_ship_scores_durations[episode_step] = step_details[
                        'ship_scores_duration']
                    first_ship_plans_durations[episode_step] = step_details[
                        'ship_plans_duration']
                    first_ship_map_durations[episode_step] = step_details[
                        'ship_map_duration']
                    first_agent_ship_counts[current_observation['step']] = len(
                        player_obs[2])
                step_delay = time.time() - step_start_time
                action_delays[episode_step, active_id] = step_delay
                total_halite_spent[active_id] += halite_spent
                if verbose:
                    print("Player {} obs: {}".format(active_id, player_obs))
                    print("Actions: {}\n".format(mapped_actions))
                player_mapped_actions.append(mapped_actions)
            else:
                player_mapped_actions.append({})

        env.step(player_mapped_actions)

        for i in range(num_agents):
            agent_status = env.state[i].status
            halite_score = -1 if agent_status in [
                'INVALID', 'DONE'
            ] else env.state[0].observation.players[i][0]
            halite_scores[episode_step + 1, i] = halite_score

        ordered_current_observation = utils.structured_env_obs(
            env.configuration, env_observation, 0)
        num_lost_ships[episode_step] = get_lost_ships_count(
            player_mapped_actions,
            players,
            env.state[0].observation.players,
            ordered_current_observation,
            verbose_id=rule_actions_id + 0.5)

        episode_step += 1
        if early_episode_termination is not None and (
                episode_step >= (early_episode_termination - 1)):
            break

    # Write the terminal halite scores
    halite_scores = update_terminal_halite_scores(num_agents, halite_scores,
                                                  episode_step,
                                                  max_episode_steps, env)

    # Evaluate why the game evolved as it did
    # import pdb; pdb.set_trace()
    action_override_counts = np.array([
        first_agent_step_details[i]['action_overrides']
        for i in range(len(first_agent_step_details))
    ])

    print("Action override counts:", action_override_counts.sum(0))
    print("Num lost ships:", num_lost_ships.sum(0))

    # Obtain the terminal rewards for all agents
    episode_rewards = get_episode_rewards(halite_scores)

    # Obtain the terminal number of ships and bases for all agents
    terminal_num_bases, terminal_num_ships = get_base_and_ship_counts(env)
    terminal_halite = halite_scores[-1].tolist()
    print("Terminal halite:", terminal_halite)

    # Generate the episode recording if requested
    if record_game:
        game_recording = env.render(mode="html", width=800, height=600)
    else:
        game_recording = None

    # Combine the different first player durations into a matrix for better
    # analysis
    all_first_durations = np.stack([
        action_delays[:, rule_actions_id],
        first_get_actions_durations,
        first_box_in_durations,
        first_history_durations,
        first_ship_scores_durations,
        first_ship_plans_durations,
        first_ship_map_durations,
    ], -1)

    # Store the game data
    this_game_data = ExperienceGame(
        game_id,
        config_game_agents,
        game_agent_paths,
        initial_halite_setup,
        initial_agents_setup,
        halite_scores,
        all_first_durations,
        action_delays,
        first_get_actions_durations,
        first_box_in_durations,
        first_history_durations,
        first_ship_scores_durations,
        first_ship_plans_durations,
        first_ship_map_durations,
        episode_step,
        episode_rewards,
        terminal_num_bases,
        terminal_num_ships,
        terminal_halite,
        total_halite_spent,
        None,  # Opponent names added outside of this function
        env_random_seed,
        act_random_seeds,
        # first_agent_step_details,
        game_recording,
        num_lost_ships,
    )

    episode_duration = time.time() - episode_start_time

    return (this_game_data, episode_duration)
Esempio n. 6
0
def record_videos(agent_path,
                  num_agents_per_game,
                  num_mirror_dim,
                  extension_override=None):
    print("Generating videos of iteration {}".format(agent_path))
    model = load_models([agent_path])[0]
    action_costs = get_action_costs()

    def my_agent(observation, env_configuration):
        active_id = observation.player
        current_observation = structured_env_obs(env_configuration,
                                                 observation, active_id)
        player_obs = observation.players[active_id]

        # Preprocess the state so it can be fed in to the network
        obs_input = np.expand_dims(
            state_to_input(current_observation, num_mirror_dim=num_mirror_dim),
            0)

        # Obtain the q values
        q_values = model(obs_input).numpy()[0]

        # Determine valid actions for each of the ships/shipyards
        all_key_q_valid = get_key_q_valid(
            q_values, player_obs, env_configuration,
            current_observation['rewards_bases_ships'])

        mapped_actions = {}
        action_budget = player_obs[0]

        for i, (k, q_sub_values, valid_sub_actions, r, c,
                _) in enumerate(all_key_q_valid):
            # Set actions we can't afford to invalid
            valid_sub_actions &= action_costs <= action_budget
            valid_sub_actions = np.where(valid_sub_actions)[0]
            best_q = q_sub_values[valid_sub_actions].max()
            best_a_id = np.where(
                q_sub_values[valid_sub_actions] == best_q)[0][0]
            action_id = valid_sub_actions[best_a_id]

            # Hard coded epsilon greedy exploration
            if np.random.uniform() < 0.05:
                action_id = np.random.choice(valid_sub_actions)

            action_budget -= action_costs[action_id]
            mapped_action = ACTION_MAPPING[action_id]
            if mapped_action == GO_NEAREST_BASE:
                mapped_action = get_direction_nearest_base(
                    player_obs, r, c, env_configuration.size)
            if not mapped_action in [SHIP_NONE, BASE_NONE]:
                mapped_actions[k] = mapped_action

        return mapped_actions

    env = make_environment("halite", configuration={
        "agentExec": "LOCAL"
    })  #, configuration={"agentTimeout": 10000, "actTimeout": 10000})
    for video_type in ["random opponent", "self play"]:
        env.reset(num_agents=num_agents_per_game)
        agents = [my_agent
                  ] * num_agents_per_game if video_type == "self play" else [
                      my_agent
                  ] + ["random"] * (num_agents_per_game - 1)
        env.run(agents)

        # Save the HTML recording in the videos folder
        game_recording = env.render(mode="html", width=800, height=600)
        folder, extension = tuple(agent_path.rsplit('/', 1))
        videos_folder = os.path.join(folder, 'Videos')
        Path(videos_folder).mkdir(parents=True, exist_ok=True)
        ext = extension[:-3] if extension_override is None else extension_override
        video_path = os.path.join(videos_folder,
                                  ext + ' - ' + video_type + '.html')
        with open(video_path, "w") as f:
            f.write(game_recording)
Esempio n. 7
0
def collect_experience_single_game(this_agent, other_agents, num_agents,
                                   agent_config, action_costs, verbose,
                                   game_id):
    episode_start_time = time.time()
    game_agents, this_agent_position, opponent_id = get_game_agents(
        this_agent, other_agents, num_agents)

    this_game_data = []
    env = make_environment('halite')
    env.reset(num_agents=num_agents)
    exploration_parameter, max_exploration_parameter = (
        get_exploration_parameter(agent_config))
    max_episode_steps = env.configuration.episodeSteps
    halite_scores = np.full((max_episode_steps, num_agents), np.nan)
    halite_scores[0] = env.state[0].observation.players[0][0]
    episode_step = 0

    # Take actions until the game is terminated
    while not env.done:
        env_observation = env.state[0].observation
        player_current_observations = []
        player_current_obs = []
        player_env_obs = []
        player_network_outputs = []
        player_actions = []
        player_mapped_actions = []
        player_valid_actions = []
        store_transition_ids = []
        for active_id in range(num_agents):
            agent_status = env.state[active_id].status
            if agent_status == 'ACTIVE':
                store_transition_ids.append(active_id)
                current_observation = utils.structured_env_obs(
                    env.configuration, env_observation, active_id)
                player_obs = env.state[0].observation.players[active_id]
                (current_obs, network_outputs, actions, mapped_actions,
                 valid_actions) = utils.get_agent_q_and_a(
                     game_agents[active_id],
                     current_observation,
                     player_obs,
                     env.configuration,
                     agent_config['epsilon_greedy'],
                     exploration_parameter,
                     agent_config['num_mirror_dim'],
                     action_costs,
                     pick_first_on_tie=False)
                if verbose:
                    print("Player {} obs: {}".format(active_id, player_obs))
                    print("Actions: {}\n".format(mapped_actions))
                player_current_observations.append(current_observation)
                player_current_obs.append(current_obs[0][0])
                player_env_obs.append(player_obs)
                player_network_outputs.append(network_outputs)
                player_actions.append(actions)
                player_mapped_actions.append(mapped_actions)
                player_valid_actions.append(valid_actions)

            else:
                if agent_status != 'INVALID':
                    raise ValueError(
                        "Unexpected agent state: {}".format(agent_status))
                player_mapped_actions.append({})

        if verbose:
            print("Step: {}; Max halite: {}".format(
                episode_step, current_observation['halite'].max()))

        env.step(player_mapped_actions)
        env_observation = env.state[0].observation

        # Store the state transition data
        for i, active_id in enumerate(store_transition_ids):
            next_observation = utils.structured_env_obs(
                env.configuration, env_observation, active_id)
            # next_halite = next_observation['rewards_bases_ships'][0][0]
            # next_obs = utils.state_to_input(next_observation)
            agent_status = env.state[active_id].status
            next_halite = env.state[0].observation.players[active_id][0]

            # if next_halite-halite_scores[episode_step, active_id] < -5000:
            #   import pdb; pdb.set_trace()

            # Overwrite selected actions to None if the environment did not execute
            # the requested action.
            player_obs = env.state[0].observation.players[active_id]
            player_actions[i] = set_ignored_actions_to_None(
                player_actions[i], player_mapped_actions[active_id],
                player_env_obs[i], player_obs, player_current_observations[i],
                next_observation)

            this_game_data.append(
                ExperienceStep(
                    game_id,
                    player_current_obs[i],
                    player_actions[i],
                    player_mapped_actions[active_id],
                    player_valid_actions[i],
                    player_network_outputs[i],
                    # next_obs, # Dropped out of memory concerns - useful for debugging
                    active_id == this_agent_position,  # This agent move?
                    active_id,
                    episode_step,
                    next_halite,
                    next_halite - halite_scores[episode_step, active_id],
                    np.
                    nan,  # Number of episode steps, overwritten at the end of episode
                    agent_status == 'INVALID',  # Last episode action
                    np.nan,  # Reward, overwritten at the end of the episode
                ))

        for i in range(num_agents):
            agent_status = env.state[i].status
            halite_score = -1 if agent_status == 'INVALID' else env.state[
                0].observation.players[i][0]
            halite_scores[episode_step + 1, i] = halite_score

        episode_step += 1

    # Obtain the terminal rewards for all agents
    halite_scores = halite_scores[:episode_step]
    episode_rewards = get_episode_rewards(halite_scores)

    # Update statistics which can not be computed before the episode is over.
    for i in range(len(store_transition_ids)):
        this_game_data[-1 -
                       i].last_episode_action = True  # Last episode action
    for i in range(len(this_game_data)):
        this_game_data[i].num_episode_steps = episode_step

    episode_duration = time.time() - episode_start_time

    return (this_game_data, episode_rewards, opponent_id, this_agent_position,
            episode_duration)