Exemple #1
0
def get_base_and_ship_counts(env):
    terminal_obs = utils.structured_env_obs(env.configuration,
                                            env.state[0].observation, 0)
    terminal_base_counts = []
    terminal_ship_counts = []
    for i, (_, bases, ships,
            _) in enumerate(terminal_obs['rewards_bases_ships']):
        terminal_base_counts.append(bases.sum())
        terminal_ship_counts.append(ships.sum())

    return terminal_base_counts, terminal_ship_counts
Exemple #2
0
    def my_agent(observation, config_id):
        config = AGENT_CONFIGS[config_id]
        rng_action_seed = rng_action_seeds[config_id]
        active_id = observation.player
        current_observation = utils.structured_env_obs(env_configuration,
                                                       observation, active_id)
        player_obs = observation.players[active_id]

        mapped_actions, _, _ = get_config_or_callable_actions(
            config, current_observation, player_obs, observation,
            env_configuration, rng_action_seed)

        return mapped_actions
Exemple #3
0
def collect_experience_single_game(game_agent_paths, game_agents, num_agents,
                                   verbose, game_id, env_random_seed,
                                   act_random_seeds, record_game,
                                   episode_steps_override,
                                   early_episode_termination, rule_actions_id):
    episode_start_time = time.time()

    # Generate reproducible data for better debugging
    utils.set_seed(env_random_seed)

    game_agents = [
        a if isinstance(a, dict) else (kaggle_agent.get_last_callable(a))
        for a in game_agents
    ]
    config_game_agents = [
        a if isinstance(a, dict) else "text_agent" for a in (game_agents)
    ]

    # Add option to shuffle the location of the main agent - for now this serves
    # for testing the stateful history logic.
    first_rule_agent = game_agents.pop(0)
    game_agents.insert(rule_actions_id, first_rule_agent)

    env_config = {"randomSeed": env_random_seed}
    if episode_steps_override is not None:
        env_config["episodeSteps"] = episode_steps_override
    env = make_environment('halite', configuration=env_config)
    env.reset(num_agents=num_agents)
    max_episode_steps = env.configuration.episodeSteps
    if early_episode_termination is not None:
        max_episode_steps = min(max_episode_steps, early_episode_termination)
    halite_scores = np.full((max_episode_steps, num_agents), np.nan)
    action_delays = np.full((max_episode_steps - 1, num_agents), np.nan)
    first_get_actions_durations = np.full(max_episode_steps - 1, np.nan)
    first_box_in_durations = np.full(max_episode_steps - 1, np.nan)
    first_history_durations = np.full(max_episode_steps - 1, np.nan)
    first_ship_scores_durations = np.full(max_episode_steps - 1, np.nan)
    first_ship_plans_durations = np.full(max_episode_steps - 1, np.nan)
    first_ship_map_durations = np.full(max_episode_steps - 1, np.nan)
    halite_scores[0] = env.state[0].observation.players[0][0]
    total_halite_spent = np.zeros(num_agents).tolist()

    initial_obs = utils.structured_env_obs(env.configuration,
                                           env.state[0].observation, 0)
    initial_halite_setup = initial_obs['halite']
    initial_agents_setup = np.zeros_like(initial_halite_setup)
    for i, (_, _, ships, _) in enumerate(initial_obs['rewards_bases_ships']):
        initial_agents_setup = initial_agents_setup + (i + 1) * ships

    # Take actions until the game is terminated
    episode_step = 0
    num_lost_ships = np.zeros((max_episode_steps - 1, num_agents),
                              dtype=np.int)
    first_agent_step_details = []
    first_agent_ship_counts = np.zeros(max_episode_steps - 1)
    ship_counts = np.full((max_episode_steps - 1, num_agents), np.nan)
    histories = [{} for i in range(num_agents)]
    while not env.done:
        env_observation = env.state[0].observation
        player_mapped_actions = []
        for active_id in range(num_agents):
            agent_status = env.state[active_id].status
            players = env.state[0].observation.players
            if agent_status == 'ACTIVE':
                current_observation = utils.structured_env_obs(
                    env.configuration, env_observation, active_id)
                player_obs = players[active_id]
                env_observation.player = active_id
                step_start_time = time.time()
                mapped_actions, updated_history, halite_spent, step_details = (
                    rule_utils.get_config_or_callable_actions(
                        game_agents[active_id], current_observation,
                        player_obs, env_observation, env.configuration,
                        histories[active_id], act_random_seeds[active_id]))
                histories[active_id] = updated_history
                ship_counts[current_observation['step'],
                            active_id] = len(player_obs[2])
                if active_id == rule_actions_id:
                    first_agent_step_details.append(step_details)
                    first_get_actions_durations[episode_step] = step_details[
                        'get_actions_duration']
                    first_box_in_durations[episode_step] = step_details[
                        'box_in_duration']
                    first_history_durations[episode_step] = step_details[
                        'history_start_duration']
                    first_ship_scores_durations[episode_step] = step_details[
                        'ship_scores_duration']
                    first_ship_plans_durations[episode_step] = step_details[
                        'ship_plans_duration']
                    first_ship_map_durations[episode_step] = step_details[
                        'ship_map_duration']
                    first_agent_ship_counts[current_observation['step']] = len(
                        player_obs[2])
                step_delay = time.time() - step_start_time
                action_delays[episode_step, active_id] = step_delay
                total_halite_spent[active_id] += halite_spent
                if verbose:
                    print("Player {} obs: {}".format(active_id, player_obs))
                    print("Actions: {}\n".format(mapped_actions))
                player_mapped_actions.append(mapped_actions)
            else:
                player_mapped_actions.append({})

        env.step(player_mapped_actions)

        for i in range(num_agents):
            agent_status = env.state[i].status
            halite_score = -1 if agent_status in [
                'INVALID', 'DONE'
            ] else env.state[0].observation.players[i][0]
            halite_scores[episode_step + 1, i] = halite_score

        ordered_current_observation = utils.structured_env_obs(
            env.configuration, env_observation, 0)
        num_lost_ships[episode_step] = get_lost_ships_count(
            player_mapped_actions,
            players,
            env.state[0].observation.players,
            ordered_current_observation,
            verbose_id=rule_actions_id + 0.5)

        episode_step += 1
        if early_episode_termination is not None and (
                episode_step >= (early_episode_termination - 1)):
            break

    # Write the terminal halite scores
    halite_scores = update_terminal_halite_scores(num_agents, halite_scores,
                                                  episode_step,
                                                  max_episode_steps, env)

    # Evaluate why the game evolved as it did
    # import pdb; pdb.set_trace()
    action_override_counts = np.array([
        first_agent_step_details[i]['action_overrides']
        for i in range(len(first_agent_step_details))
    ])

    print("Action override counts:", action_override_counts.sum(0))
    print("Num lost ships:", num_lost_ships.sum(0))

    # Obtain the terminal rewards for all agents
    episode_rewards = get_episode_rewards(halite_scores)

    # Obtain the terminal number of ships and bases for all agents
    terminal_num_bases, terminal_num_ships = get_base_and_ship_counts(env)
    terminal_halite = halite_scores[-1].tolist()
    print("Terminal halite:", terminal_halite)

    # Generate the episode recording if requested
    if record_game:
        game_recording = env.render(mode="html", width=800, height=600)
    else:
        game_recording = None

    # Combine the different first player durations into a matrix for better
    # analysis
    all_first_durations = np.stack([
        action_delays[:, rule_actions_id],
        first_get_actions_durations,
        first_box_in_durations,
        first_history_durations,
        first_ship_scores_durations,
        first_ship_plans_durations,
        first_ship_map_durations,
    ], -1)

    # Store the game data
    this_game_data = ExperienceGame(
        game_id,
        config_game_agents,
        game_agent_paths,
        initial_halite_setup,
        initial_agents_setup,
        halite_scores,
        all_first_durations,
        action_delays,
        first_get_actions_durations,
        first_box_in_durations,
        first_history_durations,
        first_ship_scores_durations,
        first_ship_plans_durations,
        first_ship_map_durations,
        episode_step,
        episode_rewards,
        terminal_num_bases,
        terminal_num_ships,
        terminal_halite,
        total_halite_spent,
        None,  # Opponent names added outside of this function
        env_random_seed,
        act_random_seeds,
        # first_agent_step_details,
        game_recording,
        num_lost_ships,
    )

    episode_duration = time.time() - episode_start_time

    return (this_game_data, episode_duration)
Exemple #4
0
def get_game_ship_base_loss_count(replay, player_id, game_agent,
                                  process_each_step):
    num_steps = len(replay['steps'])
    prev_units_obs = replay['steps'][0][0]['observation']['players'][player_id]
    destroyed_conversions = 0
    boxed_ship_loss = 0
    shipyard_collision_losses = 0
    ship_loss = 0
    base_loss = 0
    ship_non_boxed_loss_counterfactual = 0
    all_counterfactual_ship_loss = 0
    prev_obs = None
    prev_env_observation = None
    env_configuration = utils.dotdict(replay['configuration'])
    for i in range(num_steps - 1):
        current_units_obs = replay['steps'][
            i + 1][0]['observation']['players'][player_id]
        env_observation = utils.dotdict(replay['steps'][i +
                                                        1][0]['observation'])
        env_observation['step'] = i + 1
        env_observation.player = player_id
        obs = utils.structured_env_obs(env_configuration, env_observation,
                                       player_id)

        prev_actions = replay['steps'][i + 1][player_id]['action']
        for k in prev_units_obs[2]:
            if not k in current_units_obs[2]:
                # pos_actions_ships = get_pos_actions_ships(prev_units_obs[2], prev_actions)
                prev_pos = prev_units_obs[2][k][0]
                if not prev_pos in current_units_obs[1].values():
                    ship_action = get_ship_action(k, prev_actions)
                    if ship_action == "CONVERT":
                        destroyed_conversions += 1
                    else:
                        boxed_ship_loss += int(
                            is_boxed_ship_loss(prev_pos, prev_obs))
                        ship_loss += 1
                        if not boxed_ship_loss:
                            if ship_action is not None and base_at_collision_pos(
                                    prev_pos, ship_action, replay['steps'][i],
                                    replay['steps'][i + 1]):
                                shipyard_collision_losses += 1
                            else:
                                mapped_actions, _, step_details = (
                                    rule_utils.get_config_or_callable_actions(
                                        game_agent, prev_obs, prev_units_obs,
                                        prev_env_observation,
                                        env_configuration))
                                # if prev_obs['step'] == 281:
                                #   import pdb; pdb.set_trace()

                                ship_non_boxed_loss_counterfactual += (
                                    ship_loss_count_counterfact(
                                        mapped_actions, prev_units_obs, obs))

        if process_each_step and prev_obs is not None:
            mapped_actions, _, step_details = (
                rule_utils.get_config_or_callable_actions(
                    game_agent, prev_obs, prev_units_obs, prev_env_observation,
                    env_configuration))
            # if prev_obs['step'] == 204:
            #   print(mapped_actions)
            #   import pdb; pdb.set_trace()

            all_counterfactual_ship_loss += (ship_loss_count_counterfact(
                mapped_actions, prev_units_obs, obs))

        for k in prev_units_obs[1]:
            if not k in current_units_obs[1]:
                base_loss += 1

        prev_env_observation = env_observation
        prev_units_obs = current_units_obs
        prev_obs = obs

    return (destroyed_conversions, boxed_ship_loss, shipyard_collision_losses,
            ship_loss, base_loss, ship_non_boxed_loss_counterfactual,
            all_counterfactual_ship_loss)
Exemple #5
0
def collect_experience_single_game(this_agent, other_agents, num_agents,
                                   agent_config, action_costs, verbose,
                                   game_id):
    episode_start_time = time.time()
    game_agents, this_agent_position, opponent_id = get_game_agents(
        this_agent, other_agents, num_agents)

    this_game_data = []
    env = make_environment('halite')
    env.reset(num_agents=num_agents)
    exploration_parameter, max_exploration_parameter = (
        get_exploration_parameter(agent_config))
    max_episode_steps = env.configuration.episodeSteps
    halite_scores = np.full((max_episode_steps, num_agents), np.nan)
    halite_scores[0] = env.state[0].observation.players[0][0]
    episode_step = 0

    # Take actions until the game is terminated
    while not env.done:
        env_observation = env.state[0].observation
        player_current_observations = []
        player_current_obs = []
        player_env_obs = []
        player_network_outputs = []
        player_actions = []
        player_mapped_actions = []
        player_valid_actions = []
        store_transition_ids = []
        for active_id in range(num_agents):
            agent_status = env.state[active_id].status
            if agent_status == 'ACTIVE':
                store_transition_ids.append(active_id)
                current_observation = utils.structured_env_obs(
                    env.configuration, env_observation, active_id)
                player_obs = env.state[0].observation.players[active_id]
                (current_obs, network_outputs, actions, mapped_actions,
                 valid_actions) = utils.get_agent_q_and_a(
                     game_agents[active_id],
                     current_observation,
                     player_obs,
                     env.configuration,
                     agent_config['epsilon_greedy'],
                     exploration_parameter,
                     agent_config['num_mirror_dim'],
                     action_costs,
                     pick_first_on_tie=False)
                if verbose:
                    print("Player {} obs: {}".format(active_id, player_obs))
                    print("Actions: {}\n".format(mapped_actions))
                player_current_observations.append(current_observation)
                player_current_obs.append(current_obs[0][0])
                player_env_obs.append(player_obs)
                player_network_outputs.append(network_outputs)
                player_actions.append(actions)
                player_mapped_actions.append(mapped_actions)
                player_valid_actions.append(valid_actions)

            else:
                if agent_status != 'INVALID':
                    raise ValueError(
                        "Unexpected agent state: {}".format(agent_status))
                player_mapped_actions.append({})

        if verbose:
            print("Step: {}; Max halite: {}".format(
                episode_step, current_observation['halite'].max()))

        env.step(player_mapped_actions)
        env_observation = env.state[0].observation

        # Store the state transition data
        for i, active_id in enumerate(store_transition_ids):
            next_observation = utils.structured_env_obs(
                env.configuration, env_observation, active_id)
            # next_halite = next_observation['rewards_bases_ships'][0][0]
            # next_obs = utils.state_to_input(next_observation)
            agent_status = env.state[active_id].status
            next_halite = env.state[0].observation.players[active_id][0]

            # if next_halite-halite_scores[episode_step, active_id] < -5000:
            #   import pdb; pdb.set_trace()

            # Overwrite selected actions to None if the environment did not execute
            # the requested action.
            player_obs = env.state[0].observation.players[active_id]
            player_actions[i] = set_ignored_actions_to_None(
                player_actions[i], player_mapped_actions[active_id],
                player_env_obs[i], player_obs, player_current_observations[i],
                next_observation)

            this_game_data.append(
                ExperienceStep(
                    game_id,
                    player_current_obs[i],
                    player_actions[i],
                    player_mapped_actions[active_id],
                    player_valid_actions[i],
                    player_network_outputs[i],
                    # next_obs, # Dropped out of memory concerns - useful for debugging
                    active_id == this_agent_position,  # This agent move?
                    active_id,
                    episode_step,
                    next_halite,
                    next_halite - halite_scores[episode_step, active_id],
                    np.
                    nan,  # Number of episode steps, overwritten at the end of episode
                    agent_status == 'INVALID',  # Last episode action
                    np.nan,  # Reward, overwritten at the end of the episode
                ))

        for i in range(num_agents):
            agent_status = env.state[i].status
            halite_score = -1 if agent_status == 'INVALID' else env.state[
                0].observation.players[i][0]
            halite_scores[episode_step + 1, i] = halite_score

        episode_step += 1

    # Obtain the terminal rewards for all agents
    halite_scores = halite_scores[:episode_step]
    episode_rewards = get_episode_rewards(halite_scores)

    # Update statistics which can not be computed before the episode is over.
    for i in range(len(store_transition_ids)):
        this_game_data[-1 -
                       i].last_episode_action = True  # Last episode action
    for i in range(len(this_game_data)):
        this_game_data[i].num_episode_steps = episode_step

    episode_duration = time.time() - episode_start_time

    return (this_game_data, episode_rewards, opponent_id, this_agent_position,
            episode_duration)
Exemple #6
0
def get_game_ship_base_loss_count(replay, player_id, game_agent,
                                  process_each_step):
  num_steps = len(replay['steps'])
  prev_units_obs = replay['steps'][0][0]['observation']['players'][player_id]
  destroyed_conversions = 0
  boxed_ship_loss = 0
  shipyard_collision_losses = 0
  ship_loss = 0
  base_loss = 0
  ship_non_boxed_loss_counterfactual = 0
  all_counterfactual_ship_loss = 0
  prev_obs = None
  prev_env_observation = None
  env_configuration = utils.dotdict(replay['configuration'])
  history = {}
  prev_history = -1
  step_times = []
  my_step_durations = np.zeros((400, 8))
  for i in range(num_steps-1):
    print(i)
    current_units_obs = replay['steps'][i][0]['observation']['players'][
      player_id]
    env_observation = utils.dotdict(replay['steps'][i][0]['observation'])
    env_observation['step'] = i
    env_observation.player = player_id
    obs = utils.structured_env_obs(env_configuration, env_observation,
                                   player_id)
    prev_actions = replay['steps'][i][player_id]['action']
    actions = replay['steps'][i+1][player_id]['action']
    
    # if i == 274:
    #   import pdb; pdb.set_trace()
    
    for k in prev_units_obs[2]:
      if not k in current_units_obs[2]:
        # pos_actions_ships = get_pos_actions_ships(prev_units_obs[2], prev_actions)
        prev_pos = prev_units_obs[2][k][0]
        if not prev_pos in current_units_obs[1].values():
          prev_ship_action = get_ship_action(k, prev_actions)
          if prev_ship_action == "CONVERT":
            destroyed_conversions += 1 
          else:
            boxed_ship_loss += int(is_boxed_ship_loss(prev_pos, prev_obs))
            # import pdb; pdb.set_trace()
            ship_loss += 1
            if not boxed_ship_loss:
              if prev_ship_action is not None and base_at_collision_pos(
                  prev_pos, prev_ship_action, replay['steps'][i],
                  replay['steps'][i+1]):
                # import pdb; pdb.set_trace()
                print(history['prev_step']['observation']['step'],
                      prev_history['prev_step']['observation']['step'])
                shipyard_collision_losses += 1
              else:
                mapped_actions, _, _, _ = (
                  rule_utils.get_config_or_callable_actions(
                    game_agent, prev_obs, prev_units_obs,
                    prev_env_observation, env_configuration, copy.deepcopy(
                      prev_history)))
                # if prev_obs['step'] == 281:
                #   import pdb; pdb.set_trace()
                
                # mapped_actions['6-2'] = 'WEST'
                # import pdb; pdb.set_trace()
                ship_non_boxed_loss_counterfactual += (
                  ship_loss_count_counterfact(mapped_actions, prev_units_obs,
                                              obs, debug=False))
    
    if process_each_step:
      if prev_obs is not None:
        # import pdb; pdb.set_trace()
        mapped_actions, _, _, _ = (
          rule_utils.get_config_or_callable_actions(
            game_agent, prev_obs, prev_units_obs, prev_env_observation,
            env_configuration, copy.deepcopy(prev_history)))
        # if prev_obs['step'] == 204:
        #   print(mapped_actions)
        #   import pdb; pdb.set_trace()
        
        all_counterfactual_ship_loss += (
          ship_loss_count_counterfact(mapped_actions, prev_units_obs, obs))
        
      # import pdb; pdb.set_trace()
      prev_history = copy.deepcopy(history)
      start_time = time.time()
      current_actions, history, _, step_details = (
        rule_utils.get_config_or_callable_actions(
          game_agent, obs, current_units_obs, env_observation,
          env_configuration, history))
      if step_details is not None:
        step_time = time.time()-start_time
        step_times.append(step_time)
        my_step_durations[i] = np.array([
          step_details['get_actions_duration'],
          step_details['ship_scores_duration'],
          step_details['ship_plans_duration'],
          step_details['ship_map_duration'],
          step_details['inner_loop_ship_plans_duration'],
          step_details['recompute_ship_plan_order_duration'],
          step_details['history_start_duration'],
          step_details['box_in_duration'],
          ])
      
      # Overwrite the prev actions in history
      try:
        none_included_ship_actions = {k: (actions[k] if (
          k in actions) else None) for k in current_units_obs[2]}
      except:
        # This happens when my submission times out
        import pdb; pdb.set_trace()
        x=1
      history['prev_step']['my_ship_actions'] = none_included_ship_actions
      
      # print(current_actions, actions)
      # for k in current_actions:
      #   if current_actions[k] != actions[k]:
      #     import pdb; pdb.set_trace()
      #     x=1
    
    for k in prev_units_obs[1]:
      if not k in current_units_obs[1]:
        base_loss += 1
        
    prev_env_observation = env_observation
    prev_units_obs = current_units_obs
    prev_obs = obs
    
    # if i > 1:
    #   print(history['prev_step']['observation']['step'],
    #                     prev_history['prev_step']['observation']['step'])
      
  return ((destroyed_conversions, boxed_ship_loss, shipyard_collision_losses,
           ship_loss, base_loss, ship_non_boxed_loss_counterfactual,
           all_counterfactual_ship_loss), np.array(step_times),
          my_step_durations)