def get_base_and_ship_counts(env): terminal_obs = utils.structured_env_obs(env.configuration, env.state[0].observation, 0) terminal_base_counts = [] terminal_ship_counts = [] for i, (_, bases, ships, _) in enumerate(terminal_obs['rewards_bases_ships']): terminal_base_counts.append(bases.sum()) terminal_ship_counts.append(ships.sum()) return terminal_base_counts, terminal_ship_counts
def my_agent(observation, config_id): config = AGENT_CONFIGS[config_id] rng_action_seed = rng_action_seeds[config_id] active_id = observation.player current_observation = utils.structured_env_obs(env_configuration, observation, active_id) player_obs = observation.players[active_id] mapped_actions, _, _ = get_config_or_callable_actions( config, current_observation, player_obs, observation, env_configuration, rng_action_seed) return mapped_actions
def collect_experience_single_game(game_agent_paths, game_agents, num_agents, verbose, game_id, env_random_seed, act_random_seeds, record_game, episode_steps_override, early_episode_termination, rule_actions_id): episode_start_time = time.time() # Generate reproducible data for better debugging utils.set_seed(env_random_seed) game_agents = [ a if isinstance(a, dict) else (kaggle_agent.get_last_callable(a)) for a in game_agents ] config_game_agents = [ a if isinstance(a, dict) else "text_agent" for a in (game_agents) ] # Add option to shuffle the location of the main agent - for now this serves # for testing the stateful history logic. first_rule_agent = game_agents.pop(0) game_agents.insert(rule_actions_id, first_rule_agent) env_config = {"randomSeed": env_random_seed} if episode_steps_override is not None: env_config["episodeSteps"] = episode_steps_override env = make_environment('halite', configuration=env_config) env.reset(num_agents=num_agents) max_episode_steps = env.configuration.episodeSteps if early_episode_termination is not None: max_episode_steps = min(max_episode_steps, early_episode_termination) halite_scores = np.full((max_episode_steps, num_agents), np.nan) action_delays = np.full((max_episode_steps - 1, num_agents), np.nan) first_get_actions_durations = np.full(max_episode_steps - 1, np.nan) first_box_in_durations = np.full(max_episode_steps - 1, np.nan) first_history_durations = np.full(max_episode_steps - 1, np.nan) first_ship_scores_durations = np.full(max_episode_steps - 1, np.nan) first_ship_plans_durations = np.full(max_episode_steps - 1, np.nan) first_ship_map_durations = np.full(max_episode_steps - 1, np.nan) halite_scores[0] = env.state[0].observation.players[0][0] total_halite_spent = np.zeros(num_agents).tolist() initial_obs = utils.structured_env_obs(env.configuration, env.state[0].observation, 0) initial_halite_setup = initial_obs['halite'] initial_agents_setup = np.zeros_like(initial_halite_setup) for i, (_, _, ships, _) in enumerate(initial_obs['rewards_bases_ships']): initial_agents_setup = initial_agents_setup + (i + 1) * ships # Take actions until the game is terminated episode_step = 0 num_lost_ships = np.zeros((max_episode_steps - 1, num_agents), dtype=np.int) first_agent_step_details = [] first_agent_ship_counts = np.zeros(max_episode_steps - 1) ship_counts = np.full((max_episode_steps - 1, num_agents), np.nan) histories = [{} for i in range(num_agents)] while not env.done: env_observation = env.state[0].observation player_mapped_actions = [] for active_id in range(num_agents): agent_status = env.state[active_id].status players = env.state[0].observation.players if agent_status == 'ACTIVE': current_observation = utils.structured_env_obs( env.configuration, env_observation, active_id) player_obs = players[active_id] env_observation.player = active_id step_start_time = time.time() mapped_actions, updated_history, halite_spent, step_details = ( rule_utils.get_config_or_callable_actions( game_agents[active_id], current_observation, player_obs, env_observation, env.configuration, histories[active_id], act_random_seeds[active_id])) histories[active_id] = updated_history ship_counts[current_observation['step'], active_id] = len(player_obs[2]) if active_id == rule_actions_id: first_agent_step_details.append(step_details) first_get_actions_durations[episode_step] = step_details[ 'get_actions_duration'] first_box_in_durations[episode_step] = step_details[ 'box_in_duration'] first_history_durations[episode_step] = step_details[ 'history_start_duration'] first_ship_scores_durations[episode_step] = step_details[ 'ship_scores_duration'] first_ship_plans_durations[episode_step] = step_details[ 'ship_plans_duration'] first_ship_map_durations[episode_step] = step_details[ 'ship_map_duration'] first_agent_ship_counts[current_observation['step']] = len( player_obs[2]) step_delay = time.time() - step_start_time action_delays[episode_step, active_id] = step_delay total_halite_spent[active_id] += halite_spent if verbose: print("Player {} obs: {}".format(active_id, player_obs)) print("Actions: {}\n".format(mapped_actions)) player_mapped_actions.append(mapped_actions) else: player_mapped_actions.append({}) env.step(player_mapped_actions) for i in range(num_agents): agent_status = env.state[i].status halite_score = -1 if agent_status in [ 'INVALID', 'DONE' ] else env.state[0].observation.players[i][0] halite_scores[episode_step + 1, i] = halite_score ordered_current_observation = utils.structured_env_obs( env.configuration, env_observation, 0) num_lost_ships[episode_step] = get_lost_ships_count( player_mapped_actions, players, env.state[0].observation.players, ordered_current_observation, verbose_id=rule_actions_id + 0.5) episode_step += 1 if early_episode_termination is not None and ( episode_step >= (early_episode_termination - 1)): break # Write the terminal halite scores halite_scores = update_terminal_halite_scores(num_agents, halite_scores, episode_step, max_episode_steps, env) # Evaluate why the game evolved as it did # import pdb; pdb.set_trace() action_override_counts = np.array([ first_agent_step_details[i]['action_overrides'] for i in range(len(first_agent_step_details)) ]) print("Action override counts:", action_override_counts.sum(0)) print("Num lost ships:", num_lost_ships.sum(0)) # Obtain the terminal rewards for all agents episode_rewards = get_episode_rewards(halite_scores) # Obtain the terminal number of ships and bases for all agents terminal_num_bases, terminal_num_ships = get_base_and_ship_counts(env) terminal_halite = halite_scores[-1].tolist() print("Terminal halite:", terminal_halite) # Generate the episode recording if requested if record_game: game_recording = env.render(mode="html", width=800, height=600) else: game_recording = None # Combine the different first player durations into a matrix for better # analysis all_first_durations = np.stack([ action_delays[:, rule_actions_id], first_get_actions_durations, first_box_in_durations, first_history_durations, first_ship_scores_durations, first_ship_plans_durations, first_ship_map_durations, ], -1) # Store the game data this_game_data = ExperienceGame( game_id, config_game_agents, game_agent_paths, initial_halite_setup, initial_agents_setup, halite_scores, all_first_durations, action_delays, first_get_actions_durations, first_box_in_durations, first_history_durations, first_ship_scores_durations, first_ship_plans_durations, first_ship_map_durations, episode_step, episode_rewards, terminal_num_bases, terminal_num_ships, terminal_halite, total_halite_spent, None, # Opponent names added outside of this function env_random_seed, act_random_seeds, # first_agent_step_details, game_recording, num_lost_ships, ) episode_duration = time.time() - episode_start_time return (this_game_data, episode_duration)
def get_game_ship_base_loss_count(replay, player_id, game_agent, process_each_step): num_steps = len(replay['steps']) prev_units_obs = replay['steps'][0][0]['observation']['players'][player_id] destroyed_conversions = 0 boxed_ship_loss = 0 shipyard_collision_losses = 0 ship_loss = 0 base_loss = 0 ship_non_boxed_loss_counterfactual = 0 all_counterfactual_ship_loss = 0 prev_obs = None prev_env_observation = None env_configuration = utils.dotdict(replay['configuration']) for i in range(num_steps - 1): current_units_obs = replay['steps'][ i + 1][0]['observation']['players'][player_id] env_observation = utils.dotdict(replay['steps'][i + 1][0]['observation']) env_observation['step'] = i + 1 env_observation.player = player_id obs = utils.structured_env_obs(env_configuration, env_observation, player_id) prev_actions = replay['steps'][i + 1][player_id]['action'] for k in prev_units_obs[2]: if not k in current_units_obs[2]: # pos_actions_ships = get_pos_actions_ships(prev_units_obs[2], prev_actions) prev_pos = prev_units_obs[2][k][0] if not prev_pos in current_units_obs[1].values(): ship_action = get_ship_action(k, prev_actions) if ship_action == "CONVERT": destroyed_conversions += 1 else: boxed_ship_loss += int( is_boxed_ship_loss(prev_pos, prev_obs)) ship_loss += 1 if not boxed_ship_loss: if ship_action is not None and base_at_collision_pos( prev_pos, ship_action, replay['steps'][i], replay['steps'][i + 1]): shipyard_collision_losses += 1 else: mapped_actions, _, step_details = ( rule_utils.get_config_or_callable_actions( game_agent, prev_obs, prev_units_obs, prev_env_observation, env_configuration)) # if prev_obs['step'] == 281: # import pdb; pdb.set_trace() ship_non_boxed_loss_counterfactual += ( ship_loss_count_counterfact( mapped_actions, prev_units_obs, obs)) if process_each_step and prev_obs is not None: mapped_actions, _, step_details = ( rule_utils.get_config_or_callable_actions( game_agent, prev_obs, prev_units_obs, prev_env_observation, env_configuration)) # if prev_obs['step'] == 204: # print(mapped_actions) # import pdb; pdb.set_trace() all_counterfactual_ship_loss += (ship_loss_count_counterfact( mapped_actions, prev_units_obs, obs)) for k in prev_units_obs[1]: if not k in current_units_obs[1]: base_loss += 1 prev_env_observation = env_observation prev_units_obs = current_units_obs prev_obs = obs return (destroyed_conversions, boxed_ship_loss, shipyard_collision_losses, ship_loss, base_loss, ship_non_boxed_loss_counterfactual, all_counterfactual_ship_loss)
def collect_experience_single_game(this_agent, other_agents, num_agents, agent_config, action_costs, verbose, game_id): episode_start_time = time.time() game_agents, this_agent_position, opponent_id = get_game_agents( this_agent, other_agents, num_agents) this_game_data = [] env = make_environment('halite') env.reset(num_agents=num_agents) exploration_parameter, max_exploration_parameter = ( get_exploration_parameter(agent_config)) max_episode_steps = env.configuration.episodeSteps halite_scores = np.full((max_episode_steps, num_agents), np.nan) halite_scores[0] = env.state[0].observation.players[0][0] episode_step = 0 # Take actions until the game is terminated while not env.done: env_observation = env.state[0].observation player_current_observations = [] player_current_obs = [] player_env_obs = [] player_network_outputs = [] player_actions = [] player_mapped_actions = [] player_valid_actions = [] store_transition_ids = [] for active_id in range(num_agents): agent_status = env.state[active_id].status if agent_status == 'ACTIVE': store_transition_ids.append(active_id) current_observation = utils.structured_env_obs( env.configuration, env_observation, active_id) player_obs = env.state[0].observation.players[active_id] (current_obs, network_outputs, actions, mapped_actions, valid_actions) = utils.get_agent_q_and_a( game_agents[active_id], current_observation, player_obs, env.configuration, agent_config['epsilon_greedy'], exploration_parameter, agent_config['num_mirror_dim'], action_costs, pick_first_on_tie=False) if verbose: print("Player {} obs: {}".format(active_id, player_obs)) print("Actions: {}\n".format(mapped_actions)) player_current_observations.append(current_observation) player_current_obs.append(current_obs[0][0]) player_env_obs.append(player_obs) player_network_outputs.append(network_outputs) player_actions.append(actions) player_mapped_actions.append(mapped_actions) player_valid_actions.append(valid_actions) else: if agent_status != 'INVALID': raise ValueError( "Unexpected agent state: {}".format(agent_status)) player_mapped_actions.append({}) if verbose: print("Step: {}; Max halite: {}".format( episode_step, current_observation['halite'].max())) env.step(player_mapped_actions) env_observation = env.state[0].observation # Store the state transition data for i, active_id in enumerate(store_transition_ids): next_observation = utils.structured_env_obs( env.configuration, env_observation, active_id) # next_halite = next_observation['rewards_bases_ships'][0][0] # next_obs = utils.state_to_input(next_observation) agent_status = env.state[active_id].status next_halite = env.state[0].observation.players[active_id][0] # if next_halite-halite_scores[episode_step, active_id] < -5000: # import pdb; pdb.set_trace() # Overwrite selected actions to None if the environment did not execute # the requested action. player_obs = env.state[0].observation.players[active_id] player_actions[i] = set_ignored_actions_to_None( player_actions[i], player_mapped_actions[active_id], player_env_obs[i], player_obs, player_current_observations[i], next_observation) this_game_data.append( ExperienceStep( game_id, player_current_obs[i], player_actions[i], player_mapped_actions[active_id], player_valid_actions[i], player_network_outputs[i], # next_obs, # Dropped out of memory concerns - useful for debugging active_id == this_agent_position, # This agent move? active_id, episode_step, next_halite, next_halite - halite_scores[episode_step, active_id], np. nan, # Number of episode steps, overwritten at the end of episode agent_status == 'INVALID', # Last episode action np.nan, # Reward, overwritten at the end of the episode )) for i in range(num_agents): agent_status = env.state[i].status halite_score = -1 if agent_status == 'INVALID' else env.state[ 0].observation.players[i][0] halite_scores[episode_step + 1, i] = halite_score episode_step += 1 # Obtain the terminal rewards for all agents halite_scores = halite_scores[:episode_step] episode_rewards = get_episode_rewards(halite_scores) # Update statistics which can not be computed before the episode is over. for i in range(len(store_transition_ids)): this_game_data[-1 - i].last_episode_action = True # Last episode action for i in range(len(this_game_data)): this_game_data[i].num_episode_steps = episode_step episode_duration = time.time() - episode_start_time return (this_game_data, episode_rewards, opponent_id, this_agent_position, episode_duration)
def get_game_ship_base_loss_count(replay, player_id, game_agent, process_each_step): num_steps = len(replay['steps']) prev_units_obs = replay['steps'][0][0]['observation']['players'][player_id] destroyed_conversions = 0 boxed_ship_loss = 0 shipyard_collision_losses = 0 ship_loss = 0 base_loss = 0 ship_non_boxed_loss_counterfactual = 0 all_counterfactual_ship_loss = 0 prev_obs = None prev_env_observation = None env_configuration = utils.dotdict(replay['configuration']) history = {} prev_history = -1 step_times = [] my_step_durations = np.zeros((400, 8)) for i in range(num_steps-1): print(i) current_units_obs = replay['steps'][i][0]['observation']['players'][ player_id] env_observation = utils.dotdict(replay['steps'][i][0]['observation']) env_observation['step'] = i env_observation.player = player_id obs = utils.structured_env_obs(env_configuration, env_observation, player_id) prev_actions = replay['steps'][i][player_id]['action'] actions = replay['steps'][i+1][player_id]['action'] # if i == 274: # import pdb; pdb.set_trace() for k in prev_units_obs[2]: if not k in current_units_obs[2]: # pos_actions_ships = get_pos_actions_ships(prev_units_obs[2], prev_actions) prev_pos = prev_units_obs[2][k][0] if not prev_pos in current_units_obs[1].values(): prev_ship_action = get_ship_action(k, prev_actions) if prev_ship_action == "CONVERT": destroyed_conversions += 1 else: boxed_ship_loss += int(is_boxed_ship_loss(prev_pos, prev_obs)) # import pdb; pdb.set_trace() ship_loss += 1 if not boxed_ship_loss: if prev_ship_action is not None and base_at_collision_pos( prev_pos, prev_ship_action, replay['steps'][i], replay['steps'][i+1]): # import pdb; pdb.set_trace() print(history['prev_step']['observation']['step'], prev_history['prev_step']['observation']['step']) shipyard_collision_losses += 1 else: mapped_actions, _, _, _ = ( rule_utils.get_config_or_callable_actions( game_agent, prev_obs, prev_units_obs, prev_env_observation, env_configuration, copy.deepcopy( prev_history))) # if prev_obs['step'] == 281: # import pdb; pdb.set_trace() # mapped_actions['6-2'] = 'WEST' # import pdb; pdb.set_trace() ship_non_boxed_loss_counterfactual += ( ship_loss_count_counterfact(mapped_actions, prev_units_obs, obs, debug=False)) if process_each_step: if prev_obs is not None: # import pdb; pdb.set_trace() mapped_actions, _, _, _ = ( rule_utils.get_config_or_callable_actions( game_agent, prev_obs, prev_units_obs, prev_env_observation, env_configuration, copy.deepcopy(prev_history))) # if prev_obs['step'] == 204: # print(mapped_actions) # import pdb; pdb.set_trace() all_counterfactual_ship_loss += ( ship_loss_count_counterfact(mapped_actions, prev_units_obs, obs)) # import pdb; pdb.set_trace() prev_history = copy.deepcopy(history) start_time = time.time() current_actions, history, _, step_details = ( rule_utils.get_config_or_callable_actions( game_agent, obs, current_units_obs, env_observation, env_configuration, history)) if step_details is not None: step_time = time.time()-start_time step_times.append(step_time) my_step_durations[i] = np.array([ step_details['get_actions_duration'], step_details['ship_scores_duration'], step_details['ship_plans_duration'], step_details['ship_map_duration'], step_details['inner_loop_ship_plans_duration'], step_details['recompute_ship_plan_order_duration'], step_details['history_start_duration'], step_details['box_in_duration'], ]) # Overwrite the prev actions in history try: none_included_ship_actions = {k: (actions[k] if ( k in actions) else None) for k in current_units_obs[2]} except: # This happens when my submission times out import pdb; pdb.set_trace() x=1 history['prev_step']['my_ship_actions'] = none_included_ship_actions # print(current_actions, actions) # for k in current_actions: # if current_actions[k] != actions[k]: # import pdb; pdb.set_trace() # x=1 for k in prev_units_obs[1]: if not k in current_units_obs[1]: base_loss += 1 prev_env_observation = env_observation prev_units_obs = current_units_obs prev_obs = obs # if i > 1: # print(history['prev_step']['observation']['step'], # prev_history['prev_step']['observation']['step']) return ((destroyed_conversions, boxed_ship_loss, shipyard_collision_losses, ship_loss, base_loss, ship_non_boxed_loss_counterfactual, all_counterfactual_ship_loss), np.array(step_times), my_step_durations)