def base_at_collision_pos(prev_pos, ship_action, prev_step, step, grid_size=21): row, col = utils.row_col_from_square_grid_pos(prev_pos, grid_size) new_row, new_col = rule_utils.move_ship_row_col( row, col, ship_action, grid_size) new_pos = new_row*grid_size+new_col for i in range(len(step)): for other_ship in step[i]['action']: if step[i]['action'][other_ship] == "CONVERT": if prev_step[0]['observation']['players'][i][2][other_ship][0] == ( new_pos): # We ran into a new established shipyard! return True return False
def ship_loss_count_counterfact(actions, prev_units, obs, grid_size=21, debug=False): # Approximately compute how many ships I would have lost at a certain # transition with some actions. Approximate because we assume there are # no 3-color ship collisions. # Compute the new position of all ships and bases, ignoring base spawns my_bases = np.zeros((grid_size, grid_size), dtype=np.bool) my_ships = np.zeros((grid_size, grid_size), dtype=np.bool) prev_bases = prev_units[1] prev_ships = prev_units[2] ship_loss = 0 for b in prev_bases: row, col = utils.row_col_from_square_grid_pos(prev_bases[b], grid_size) my_bases[row, col] = True for ship_k in prev_ships: row, col = utils.row_col_from_square_grid_pos(prev_ships[ship_k][0], grid_size) # if debug and row == 0: # import pdb; pdb.set_trace() a = get_ship_action(ship_k, actions) if a == "CONVERT": my_bases[row, col] = True else: new_row, new_col = rule_utils.move_ship_row_col(row, col, a, grid_size) if my_ships[new_row, new_col]: # Self collision # import pdb; pdb.set_trace() ship_loss += 1 else: my_ships[new_row, new_col] = 1 ship_halite = prev_ships[ship_k][1] for o in obs['rewards_bases_ships'][1:]: if o[1][new_row, new_col]: # import pdb; pdb.set_trace() ship_loss += 1 break elif o[2][new_row, new_col] and ( o[3][new_row, new_col] <= ship_halite): # Collide with less or equal halite ship # import pdb; pdb.set_trace() ship_loss += 1 break return ship_loss
def get_lost_ships_count(player_mapped_actions, prev_players, current_players, prev_observation, verbose_id=-1): num_players = len(current_players) num_lost_ships = np.zeros(num_players) prev_bases = np.stack( [rbs[1] for rbs in prev_observation['rewards_bases_ships']]).sum(0) > 0 grid_size = prev_bases.shape[0] new_convert_positions = np.zeros_like(prev_bases) for i in range(num_players): prev_player = prev_players[i] prev_actions = player_mapped_actions[i] for ship_k in prev_actions: if prev_actions[ship_k] == "CONVERT": new_convert_positions[utils.row_col_from_square_grid_pos( prev_player[2][ship_k][0], grid_size)] = 1 for i in range(num_players): prev_player = prev_players[i] current_player = current_players[i] prev_actions = player_mapped_actions[i] was_alive = len(prev_player[2]) > 0 or (len(prev_player[1]) > 0 and prev_player[0] >= 500) prev_stacked_ships = np.stack( [rbs[2] for rbs in prev_observation['rewards_bases_ships']]) prev_halite_ships = np.stack( [rbs[3] for rbs in prev_observation['rewards_bases_ships']]).sum(0) prev_halite_ships[prev_stacked_ships.sum(0) == 0] = -1e-9 if was_alive: # Loop over all ships and figure out if a ship was lost unintentionally for ship_k in prev_player[2]: if not ship_k in current_player[2]: row, col = utils.row_col_from_square_grid_pos( prev_player[2][ship_k][0], grid_size) if (not ship_k in prev_actions) or ( ship_k in prev_actions and prev_actions[ship_k] != "CONVERT"): ship_action = None if not ship_k in prev_actions else prev_actions[ ship_k] move_row, move_col = rule_utils.move_ship_row_col( row, col, ship_action, grid_size) if not prev_bases[move_row, move_col]: # Don't count base attack/defense ship loss or self collision # towards the end of the game if (not move_row * grid_size + move_col in [ s[0] for s in current_player[2].values() ]) and (prev_observation['relative_step'] < 0.975): # Don't count self collisions or collisions with a new base if not new_convert_positions[move_row, move_col]: if i == verbose_id: # import pdb; pdb.set_trace() print("Lost ship at step", prev_observation['step'] + 1) num_lost_ships[i] += 1 elif prev_actions[ship_k] == "CONVERT" and ( prev_observation['relative_step'] >= 0.025) and ( prev_observation['relative_step'] <= 0.975): # The ship most likely got boxed in and was forced to convert. # Note that this also counts lost ships due to losing the base. # Discard strategic base conversions (no nearby opponents) ship_halite = prev_player[2][ship_k][1] any_threat_ships = False for d in rule_utils.MOVE_DIRECTIONS[1:]: move_row, move_col = rule_utils.move_ship_row_col( row, col, d, grid_size) if prev_halite_ships[move_row, move_col] >= 0 and ( prev_halite_ships[move_row, move_col] <= ship_halite): ship_threat_player_id = np.where( prev_stacked_ships[:, move_row, move_col])[0][0] if ship_threat_player_id != i: any_threat_ships = True break if any_threat_ships: if i == verbose_id: print("Lost ship at step", prev_observation['step'] + 1) num_lost_ships[i] += 1 return num_lost_ships
def decide_not_converted_ship_actions( config, observation, player_obs, env_config, not_converted_ship_keys, my_next_bases, my_next_ships, obs_halite, verbose, convert_first_ship_on_None_action=True, nearby_ship_grid=3): ship_actions = {} num_ships = len(not_converted_ship_keys) halite_deposited = 0 if not not_converted_ship_keys.size: return ship_actions, my_next_ships, halite_deposited # Select ship actions sequentially in the order of the most halite on board ship_halite = np.array( [player_obs[2][k][1] for k in not_converted_ship_keys]) ship_keys_ordered_ids = np.argsort(-ship_halite) ship_halite = ship_halite[ship_keys_ordered_ids] my_ships = observation['rewards_bases_ships'][0][2].astype(np.int) opponent_ships = np.stack([ rbs[2] for rbs in observation['rewards_bases_ships'][1:]]).sum(0) halite_ships = np.stack([ rbs[3] for rbs in observation['rewards_bases_ships']]).sum(0) # Compute values that will be useful in the calculation for all ships grid_size = obs_halite.shape[0] smoothed_friendly_bases = rule_utils.smooth2d(my_next_bases) my_smoothed_ships, ships_kernel = rule_utils.smooth2d( my_ships, return_kernel=True) smoothed_halite = rule_utils.smooth2d(obs_halite) # Select the best option: go to most salient halite, return to base or stay # at base. my_next_ships = np.zeros_like(my_next_ships) # List all positions you definitely don't want to move to. Initially this # only contains enemy bases and eventually also earlier ships. bad_positions = np.stack([rbs[1] for rbs in observation[ 'rewards_bases_ships']])[1:].sum(0) # Fixed rule to decide when there should be a focus on collecting halite should_collect_halite = my_next_bases.sum() > 0 or num_ships > 1 for ship_i, ship_k in enumerate( not_converted_ship_keys[ship_keys_ordered_ids]): row, col = utils.row_col_from_square_grid_pos( player_obs[2][ship_k][0], grid_size) # Subtract the own influence of the ship from my_smoothed_ships my_smoothed_ships = rule_utils.add_warped_kernel( my_smoothed_ships, -ships_kernel, row, col) move_scores = np.zeros(len(rule_utils.MOVE_DIRECTIONS)) for i, move_dir in enumerate(rule_utils.MOVE_DIRECTIONS): new_row, new_col = rule_utils.move_ship_row_col( row, col, move_dir, grid_size) # Going to a position where I already occupy a ship or a position of an # enemy ship is very bad if bad_positions[new_row, new_col]: move_scores[i] -= 1e9 if move_dir is None: # Collecting halite is good, if we have a backup base in case the ship # gets destroyed move_scores[i] += config['halite_collect_constant']*max( 0, obs_halite[row, col])*should_collect_halite # Going closer to halite is good move_scores[i] += config['nearby_halite_move_constant']*( smoothed_halite[row, col] - smoothed_halite[new_row, new_col]) # Moving on top of halite is good, when the collect halite mode is active move_scores[i] += config['nearby_onto_halite_move_constant']*( obs_halite[new_row, new_col])*should_collect_halite # Going closer to my other ships is bad move_scores[i] -= config['nearby_ships_move_constant']*( my_smoothed_ships[new_row, new_col]) # Going closer to my bases is good, the more halite I have on the ship move_scores[i] += config['nearby_base_move_constant']*( smoothed_friendly_bases[new_row, new_col])*ship_halite[ship_i] # Going right on top of one of my bases is good, the more halite I have # on board of the ship move_scores[i] += config['nearby_move_onto_base_constant']*( my_next_bases[new_row, new_col])*ship_halite[ship_i] # Consider nearby enemy ships in a nearby_ship_grid*nearby_ship_grid # from the new position and assign penalty/gain points for possible # collisions / approachments as a function of the halite on board of # other ships for row_increment in range(-nearby_ship_grid, nearby_ship_grid+1): for col_increment in range(-nearby_ship_grid, nearby_ship_grid+1): distance_to_new = np.abs(row_increment) + np.abs(col_increment) if distance_to_new <= nearby_ship_grid: other_row = (new_row + row_increment) % grid_size other_col = (new_col + col_increment) % grid_size if opponent_ships[other_row, other_col]: halite_diff = halite_ships[other_row, other_col] - halite_ships[ row, col] if halite_diff == 0: # Equal halite - impose a penalty of half a spawn cost for # moving closer due to the risk of collision dist_kernel = 1/((distance_to_new+1)**2) move_scores[i] -= config[ 'adjacent_opponent_ships_move_constant']*( env_config.spawnCost/2*dist_kernel) elif halite_diff > 0: # I can steal the opponent's halite - moving closer is good, # proportional to the difference in cargo on board. dist_kernel = 1/((distance_to_new+1)**2) move_scores[i] += config[ 'adjacent_opponent_ships_move_constant']*( halite_diff*dist_kernel) else: # I risk losing my halite - moving closer is bad, proportional # to the difference in cargo on board. dist_kernel = 1/((max(0, distance_to_new-1)+1)**2) move_scores[i] += config[ 'adjacent_opponent_ships_move_constant']*( halite_diff*dist_kernel) if verbose: print( ship_k, move_dir, config['nearby_halite_move_constant']*( smoothed_halite[new_row, new_col]), config['nearby_onto_halite_move_constant']*( obs_halite[new_row, new_col])*should_collect_halite, config['nearby_ships_move_constant']*( my_smoothed_ships[new_row, new_col]), config['nearby_base_move_constant']*( smoothed_friendly_bases[new_row, new_col])*ship_halite[ship_i], config['nearby_move_onto_base_constant']*( my_next_bases[new_row, new_col])*ship_halite[ship_i] ) if verbose: print("Ship {} move scores: {}".format(ship_k, move_scores)) move_id = np.argmax(move_scores) move_dir = rule_utils.MOVE_DIRECTIONS[move_id] new_row, new_col = rule_utils.move_ship_row_col( row, col, move_dir, grid_size) my_next_ships[new_row, new_col] = 1 bad_positions[new_row, new_col] = 1 if move_dir is None: halite_deposited += ship_halite[ship_i]*my_next_bases[new_row, new_col] else: ship_actions[str(ship_k)] = str(move_dir) # Restore the updated influence of the moved ship to my_smoothed_ships if ship_i < (num_ships - 1): my_smoothed_ships = rule_utils.add_warped_kernel( my_smoothed_ships, ships_kernel, new_row, new_col) if not should_collect_halite and convert_first_ship_on_None_action: convert_cost = env_config.convertCost remaining_budget = player_obs[0] if num_ships == 1 and remaining_budget >= convert_cost: ship_actions[str(ship_k)] = rule_utils.CONVERT return ship_actions, my_next_ships, halite_deposited