def agent(obs, config): # Another for updates board = Board(obs, config) # Step of the board step = board.observation['step'] ships = [ ship.id for ship in sorted(board.current_player.ships, key=operator.attrgetter("halite"), reverse=True) ] actions = {} for ship_id in ships: if ship_id in board.current_player.ship_ids: next_action, action_type = DecisionShip(board, ship_id, step).determine() if action_type != 'mine': actions[ship_id] = movement_dictionary[action_type] board.ships[ship_id].next_action = next_action board = board.next() shipyard_ids = ShipyardDecisions(board, board.current_player, step).determine() for shipyard_id in board.current_player.shipyard_ids: if shipyard_id in shipyard_ids: actions[shipyard_id] = 'SPAWN' board.shipyards[shipyard_id].next_action = ShipyardAction.SPAWN board = board.next() return actions
def transform_reward(done, last_obs, obs, config): board = Board(obs, config) me = board.current_player nships = len(me.ships) nyards = len(me.shipyards) halite = me.halite cargo = sum(s.halite for s in me.ships) if nships == 0: if nyards == 0: return REWARD_LOST if halite < config.spawnCost: return REWARD_LOST if done: scores = [p.halite for p in board.players.values() if len(p.ships) > 0 or (len(p.shipyards) > 0 and p.halite >= config.spawnCost)] if halite == max(scores): if scores.count(halite) == 1: return REWARD_WON return REWARD_LOST delta = 0 if last_obs is not None: last_board = Board(last_obs, config) last_me = last_board.current_player last_nships = len(last_me.ships) last_nyards = len(last_me.shipyards) last_halite = last_me.halite last_cargo = sum(s.halite for s in last_me.ships) delta_ships = (nships - last_nships) * config.spawnCost delta_yards = (nyards - last_nyards) * (config.convertCost + config.spawnCost) delta_halite = halite - last_halite delta_cargo = cargo - last_cargo delta = delta_ships + delta_yards + delta_halite + delta_cargo if nyards == 0: delta -= config.convertCost if nships == 0: delta -= config.spawnCost delta = float(np.clip(delta / MAX_DELTA, -1, 1)) reward = delta + 0.01 return reward
def transform_actions(actions, obs, config): next_actions = dict() board = Board(obs, config) me = board.current_player board_cells = sort_cells(board.cells) si = 0 yi = MAX_SHIPS for _, c in board_cells.items(): if c.ship in me.ships and si < MAX_SHIPS: i = actions[si] ship_action = SHIP_ACTIONS[i] si += 1 if ship_action is not None: next_actions[c.ship.id] = ship_action if c.shipyard in me.shipyards and yi < MAX_SHIPS + MAX_YARDS: i = actions[yi] yard_action = YARD_ACTIONS[i] yi += 1 if yard_action is not None: next_actions[c.shipyard.id] = yard_action return next_actions
def halite_sap(): base_path = os.path.dirname(os.path.realpath(__file__)) sample_file = os.path.join(base_path, "../samples/3602885.json") with open(sample_file) as f: episode_json = json.load(f) replay = json.loads(episode_json["replay"]) team_name = "Stanley Zheng" team_idx = replay["info"]["TeamNames"].index(team_name) # This step was specifically chosen because it contains a Shipyard.SPAWN action. step_idx = 202 hsap = HaliteStateActionPair( board=Board( raw_observation=replay['steps'][step_idx - 1][0]['observation'], raw_configuration=replay['configuration'], next_actions=[ replay['steps'][step_idx][0]["action"], replay['steps'][step_idx][1]["action"] ], ), cur_team_id=team_idx, ) return hsap
def agent(obs, config): size = config.size board = Board(obs, config) me = board.current_player ship_states = {} yard_states = {} exp_part = math.exp(config.steps / EPS_DECAY) eps_threshold = EPS_END + (EPS_START - EPS_END) * exp_part sample = random.random() for yard in me.shipyards: yard_states[yard.id] = yard.position for ship in me.ships: ship_states[ship.id] = ship.position for ship in me.ships: if sample > eps_threshold: print("Predicting move") current_state = get_state(obs['halite'], ship_states, yard_states, size) move = policy_net(current_state).argmax().item() ship.next_action = ship_actions[move] else: print("random move") ship.next_action = random.choice(ship_actions) return me.next_actions, ship_states, yard_states
def update_observation_for_shipyard(board: Board, uid, action): """Simulate environment step forward and update observation https://www.kaggle.com/sam/halite-sdk-overview#Simulating-Actions-(Lookahead) """ ship = board.shipyards[uid] ship.next_action = action ret_val = board.next() return Observation(ret_val.observation)
def agent(obs, config): global commander if commander is None: commander = Commander() # add method to convert coordinate system back to topleft 0,0 Point.norm = property(lambda self: from_point(self, config.size)) board = Board(obs, config) commander.update(board, obs) return commander.get_next_actions()
def agent(obs, config): # Another for updates board = Board(obs, config) # Step of the board step = board.observation['step'] ships = [ship.id for ship in sorted(board.current_player.ships, key=operator.attrgetter("halite"), reverse=True)] actions = {} # It would be absurd to log when I am out of the game if not(len(board.current_player.ships) == 0 and board.current_player.halite < 500): log(str(step + 1) + '|-----------------------------------------------------------------------') for ship_id in ships: if ship_id in board.current_player.ship_ids: log(' Pos:' + str(board.ships[ship_id].position) + ', cargo: ' + str(board.ships[ship_id].halite) + ', player halite: ' + str(board.current_player.halite)) next_action, action_type = DecisionShip(board, ship_id, step).determine() if action_type != 'mine': actions[ship_id] = movement_dictionary[action_type] board.ships[ship_id].next_action = next_action if step == 200: log(board) board = board.next() if step == 200: log(board) # else: # log(' Not found') shipyard_ids = ShipyardDecisions(board, board.current_player, step).determine() for shipyard_id in board.current_player.shipyard_ids: if shipyard_id in shipyard_ids: actions[shipyard_id] = 'SPAWN' board.shipyards[shipyard_id].next_action = ShipyardAction.SPAWN board = board.next() return actions
def __call__( self, obs: Dict[str, Any], configuration: Dict[str, Any] ) -> Dict[Id, str]: try: # easier to re-run agent on downloaded replays; note that this might influence other bots too # random.seed(123) # np.random.seed(123) # type: ignore board = Board(obs, configuration) print(f"Step {board.step}, Player Halite: {board.current_player.halite}") state = State.from_board(board) context = Context( board=board, config=self.config, state=state, memory=self.memory ) missions = self.make_missions(board, context) num_limited_missions = self.num_limit_missions(missions) resolved_missions = self.resolve_missions(num_limited_missions) self.validate_final_missions(resolved_missions, context) actions = self.make_actions(resolved_missions) affordable_actions = self.select_affordable_actions(actions, context) resolved_actions = self.resolve_actions(affordable_actions) self.validate_final_actions(resolved_actions, context) halite_actions = self.make_halite_actions(resolved_actions) self.print_state( board=board, num_limited_missions=num_limited_missions, resolved_missions=resolved_missions, resolved_actions=resolved_actions, ) return halite_actions except Exception: traceback.print_exc( file=sys.stderr ) # you can download logs from Kaggle and inspect what went wrong raise
def agent(obs, config): size = config.size board = Board(obs, config) me = board.current_player booked_positions = [] if len(me.ships) < 4 and len(me.shipyards) > 0: yard = me.shipyards[0] if yard.cell.ship is None: yard.next_action = ShipyardAction.SPAWN if len(me.shipyards) == 0 and len(me.ships) > 0: me.ships[0].next_action = ShipAction.CONVERT ship_details = {} yard_details = {} ship_states = {} for yard in me.shipyards: yard_details[yard.id] = yard.position for ship in me.ships: ship_details[ship.id] = ship.position booked_positions.append(ship.position) if ship.next_action is None: if ship.halite < 200: ship_states[ship.id] = 'COLLECT' if ship.halite > 500: ship_states[ship.id] = 'DEPOSIT' if ship_states[ship.id] == 'COLLECT': next_action, booked_pt = ship_collect(ship, booked_positions) ship.next_action = next_action if booked_pt is not None: booked_positions.append(booked_pt) if ship_states[ship.id] == 'DEPOSIT': next_action, booked_pt = ship_deposit(ship, me.shipyards[0], size, booked_positions) ship.next_action = next_action if booked_pt is not None: booked_positions.append(booked_pt) return me.next_actions, ship_details, yard_details
def __init__(self, obs, config): self.board = Board(obs, config) self.board_prev = None self.config = self.board.configuration self.me = self.board.current_player self.dim = config.size self.mid = config.size // 2 self.quadrant_position = self.me.ships[0].position self.quadrant_points = self.get_quadrant_points() self.harvest_spot_values = None self.enemy_ship_points = None self.halite_global_mean = None self.halite_global_median = None self.halite_global_std = None self.yardcount = None self.prospective_yard = None self.action_iter = None self.keep_spawning_tripswitch = True self.cell_halite_minimum = None self.ship_carry_maximum = None # calculated at start of each loop self.halite_harvest_minimum = None self.generate_constants()
def load_examples_from_episode(episode_file, team_name): with open(episode_file) as f: episode_json = json.load(f) replay = json.loads(episode_json["replay"]) # Determine whether team of interest is player 0 or 1. # Raises exception if neither team name matches. team_idx = replay["info"]["TeamNames"].index(team_name) for step_idx, step in enumerate(replay['steps']): hsap = HaliteStateActionPair( board=Board( raw_observation=replay['steps'][step_idx - 1][0]['observation'], raw_configuration=replay['configuration'], next_actions=[step[0]["action"], step[1]["action"]], ), cur_team_id=team_idx, ) state = hsap.to_state_array() ship_actions, shipyard_actions = hsap.to_action_arrays() yield (state, ship_actions, shipyard_actions)
def __call__(self, observation, configuration): board = Board(observation, configuration) self.set_board_actions(board) return board.current_player.next_actions
def get_single_ship_move(self, ship_id, pos, step_observation, raw_observation, ship_simulated_step_memory): done = False board = Board(raw_observation, raw_configuration=self.configuration) observation = Observation(raw_observation) """ ============ Take Action ============ """ converted_observation = self.env.wrap_observation_for_ship_agent( obs=Observation(board.observation), player=board.observation['player'], spos=int(pos), uid=ship_id) state_vector = converted_observation.flatten() if self.ship_frame_stack_len > 1: multiframe_state = self.env.get_multiframe_ship_observation( ship_id) converted_obs = np.concatenate(multiframe_state, axis=0) state_vector = converted_obs.flatten() if len(self.env.get_multiframe_ship_observation( ship_id)) == self.ship_frame_stack_len: action = self.ship_agent.get_action(state_vector, step=self.step_number, game=self.episode_number) else: action = np.random.randint(0, 6) self.episode_actions.append(action) halite_action = self.env.convert_ship_action_to_halite_enum( action, observation) if halite_action and halite_action.name == halite_action.CONVERT.name and \ observation.players[observation.player][0] < 500: # tried to convert without enough halite halite_action = None action = 5 if halite_action: self.actions_for_step[ship_id] = halite_action.name # Take action try: obs_next: Observation = self.env.update_observation_for_ship( board, ship_id, halite_action) except KeyError as e: print('Actions taken') print(self.actions_for_step) print('Initial board and observation') print(step_observation.players[step_observation.player]) raise e # the ship may no longer exist... # ie it collided with an enemy ship or converted to a shipyard, we need to use the previous # for now we will use the new position IF it exists, otherwise just use the old one # next_pos = obs_next.players[observation.player][2].get(ship_id, (None, None))[0] """ ============ Prepare for Model Update ============ """ # Update model if self.training: ship_simulated_step_memory[ship_id] = { 'state': state_vector, 'action': action, 'pos': pos } action_string = halite_action.name if halite_action else 'None' if self.verbose and ((self.step_number % 10) == 0): print( f"Step {self.step_number}: Action taken {action} | {action_string} for ship {ship_id}, " f"reward received N/A | Player state {obs_next.players[observation.player]}" ) # update current observation with the simulated step ahead raw_observation = obs_next return raw_observation
def get_single_shipyard_move(self, shipyard_id, pos, step_observation, raw_observation, shipyard_temporary_initial_memory): configuration = self.configuration board = Board(raw_observation, raw_configuration=configuration) observation = Observation(raw_observation) verbose = self.verbose done = False # Select action converted_observation, is_occupied = self.env.wrap_observation_for_shipyard_agent( obs=observation, player=observation.player, spos=pos, uid=shipyard_id) state_vector = converted_observation.flatten() state_vector: np.ndarray = np.append(state_vector, is_occupied) action = self.shipyard_agent.get_action(state_vector, step=self.step_number, game=self.episode_number) halite_action = self.env.convert_shipyard_action_to_halite_enum( action, shipyard_id, observation) self.episode_actions.append(halite_action) """ ============ Take Action ============ """ obs_next = self.env.update_observation_for_shipyard( board, shipyard_id, halite_action) reward = self.env.get_shipyard_reward( obs_next, self.env.wrap_observation_for_ship_agent( obs=obs_next, player=obs_next.player, spos=pos, # because shipyards can't move uid=shipyard_id), uid=shipyard_id, done=done) self.episode_rewards.append(reward) """ ============ Prepare for Update Model ============ """ if self.training: shipyard_temporary_initial_memory[shipyard_id] = { 'state': state_vector, 'action': action, 'pos': pos, 'is_occupied': is_occupied } if verbose and ((self.step_number % 10) == 0): print( f"Step {self.step_number}: Action taken {action} for shipyard {shipyard_id}, " f"reward received {reward}") # update current observation with the simulated step ahead raw_observation = obs_next return raw_observation
def get_single_shipyard_move( self, shipyard_id, pos, step_observation, raw_observation, shipyard_temporary_initial_memory, step_number=0, episode_number=0, ): configuration = self.configuration board = Board(raw_observation, raw_configuration=configuration) observation = Observation(raw_observation) verbose = self.verbose done = False # Select action converted_observation = self.shipyard_state_wrapper.get_basic_single_frame_complete_observation( obs=observation, player=observation.player, sy_pos=pos, uid=shipyard_id) state_vector = converted_observation player_state = step_observation.players[step_observation.player] if len(player_state[2]) == 0 and player_state[0] > 500: action = 1 else: action = self.shipyard_agent.get_action(state_vector, step=step_number, game=episode_number) halite_action = self.shipyard_state_wrapper.convert_action_to_enum( shipyard_id, observation, action) if halite_action: self.actions_for_step[shipyard_id] = halite_action.name """ ============ Take Action ============ """ obs_next = self.env.update_observation_for_shipyard( board, shipyard_id, halite_action) reward = self.env.get_shipyard_reward( obs_next, self.env.wrap_observation_for_shipyard_agent( obs=obs_next, player=obs_next.player, spos=pos, # because shipyards can't move uid=shipyard_id), uid=shipyard_id, done=done) """ ============ Prepare for Update Model ============ """ is_occupied = state_vector[-2] if self.training: shipyard_temporary_initial_memory[shipyard_id] = { 'state': state_vector, 'action': action, 'pos': pos, 'is_occupied': is_occupied } if verbose and ((step_number % 10) == 0): print( f"Step {step_number}: Action taken {action} for shipyard {shipyard_id}, " f"reward received {reward}") # update current observation with the simulated step ahead raw_observation = obs_next return raw_observation
def get_actions(self, obs, config): """Main loop""" self.board = Board(obs, config) self.me = self.board.current_player me = self.me # just for shorthand spawncount = 0 self.refresh_ships() self.yardcount = len(self.me.shipyards) self.setup_stats() # Main ship loop - iterate until each ship has an action # TODO - ships on SY should go first self.action_iter = 0 while len(LOG.set_actions) != len(me.ships): self.action_iter += 1 if self.action_iter > 24: raise BaseException( f"action resolution iteration > 24 - probable infinite loop" ) if self.action_iter % 10 == 0: print(f"Action Iter:{self.action_iter}") # If no yards, create and mark point p0sy = [ sy for sy in self.board.shipyards.values() if sy.player_id == 0 ] p0sy = p0sy[0].position if len(p0sy) > 0 else Point(5, 15) if len(self.me.shipyards) == 0 or any( [self.dist(s.position, p0sy) <= 2 for s in self.me.ships]): ship = self.get_best_ship_for_yard() ship.next_action = ShipAction.CONVERT ship.log.set_action = ShipAction.CONVERT # conversion is resolved before collision - we don't need to reserve point with log.set_point ship.log.p_point = None self.prospective_yard = Shipyard('PROSPECTIVE', ship.position, self.me.id, self.board) # Calculate best potential actions for ship in [s for s in me.ships if s.log.set_action is None]: self.determine_ship_action(ship) # Confirm non-conflicting actions. Record set actions in ship log to keep track of # how many ships actions are finalized. p2s = LOG.p_point2ship # point2ship map - excluding set ships for point, ships in p2s.items(): if len(ships) == 1: # Only contender - give action ship = ships[0] action, point = ship.log.p_action, ship.log.p_point ship.next_action = action if action != 'WAIT' else None ship.log.set_action, ship.log.set_point = action, point # When ship action is calculated above, any set points should now not be possibilities. else: # Give spot to highest priority ship (currently highest halite) ships_by_halite = sorted([(s, s.halite) for s in ships], key=lambda x: -x[1]) priority_ship, halite = ships_by_halite[0] action, point = priority_ship.log.p_action, priority_ship.log.p_point priority_ship.next_action = action if action != 'WAIT' else None priority_ship.log.set_action, priority_ship.log.set_point = action, point # Ship building h2ns = [(p.halite, len(p.ships)) for p in self.board.players.values() if p.id is not me.id] nships_other = sorted(h2ns, key=lambda x: -x[0])[0][1] should_still_spawn = ((len(me.ships) <= nships_other) or (obs.step < 20)) \ and (obs.step < 360) reserve = config.convertCost if obs.step > 20 else 0 for shipyard in me.shipyards: # If we can afford spawn, considering cumulation of other SY spawns and keeping a reserve for one yard. have_enough_halite = (me.halite - spawncount * config.spawnCost - reserve) >= config.spawnCost no_ship_reserved_point = shipyard.position not in LOG.set_points if self.me.halite > 1000: shipyard.next_action = ShipyardAction.SPAWN spawncount += 1 self.board_prev = self.board return me.next_actions
def transform_observation(obs, config): board = Board(obs, config) me = board.current_player board_cells = sort_cells(board.cells) step = [] cell_yield = [] me_yard = [] me_ship = [] me_ship_cargo = [] opp_yard = [] opp_ship = [] opp_ship_cargo = [] for _, c in board_cells.items(): step.append(obs['step'] / config.episodeSteps) cell_yield.append(c.halite / config.maxCellHalite) if c.ship is None: me_ship.append(0) me_ship_cargo.append(0) opp_ship.append(0) opp_ship_cargo.append(0) elif c.ship in me.ships: me_ship.append(1) me_ship_cargo.append(c.ship.halite / MAX_SHIP_HALITE) opp_ship.append(0) opp_ship_cargo.append(0) else: me_ship.append(0) me_ship_cargo.append(0) opp_ship.append(1) opp_ship_cargo.append(c.ship.halite / MAX_SHIP_HALITE) if c.shipyard is None: me_yard.append(0) opp_yard.append(0) elif c.shipyard in me.shipyards: me_yard.append(1) opp_yard.append(0) else: me_yard.append(0) opp_yard.append(1) x_obs = np.vstack((step, cell_yield, me_yard, me_ship, me_ship_cargo, opp_yard, opp_ship, opp_ship_cargo)) x_obs = x_obs.reshape(config.size, config.size, N_FEATURES) x_obs = x_obs.astype(np.float32).clip(0, 1) return x_obs
def reset(self): """Reset trainer environment""" self.observation = self.trainer.reset() board = Board(self.observation, self.environment.configuration) return self.observation, board
def play_episode(env: HaliteEnv, ship_agent: Agent, shipyard_agent: Agent, configuration, n_steps: int = 10, verbose: bool = True, training: bool = False, simulated_step_learning: bool = False, episode_number=0): episode_rewards = [] episode_actions = [] episode_scores = [] raw_observation: dict = env.reset()[0].__dict__ print('ep: {}'.format(episode_number)) done = False for step_num in range(n_steps): if done: board = Board(raw_observation, raw_configuration=configuration) print('Done') print(board) return episode_scores actions_for_step = {} # wont change step_observation = Observation(raw_observation) shipyard_temporary_initial_memory = {} ship_temporary_initial_memory = {} """ ==================================== ==================================== SHIPYARDS ==================================== ==================================== """ for shipyard_id, (pos) in step_observation.players[ step_observation.player][1].items(): # will change at each simulated step board = Board(raw_observation, raw_configuration=configuration) observation = Observation(raw_observation) # Select action converted_observation, is_occupied = env.wrap_observation_for_shipyard_agent( obs=observation, player=observation.player, spos=pos, uid=shipyard_id) state_vector = converted_observation.flatten() state_vector: np.ndarray = np.append(state_vector, is_occupied) action = shipyard_agent.get_action(state_vector, step=step_num, game=episode_number) halite_action = env.convert_shipyard_action_to_halite_enum( action, shipyard_id, observation) episode_actions.append(halite_action) # re-aligning action and halite action # TODO: should refactor if halite_action == ShipyardAction.SPAWN: action = 1 else: action = 0 if halite_action: actions_for_step[shipyard_id] = halite_action.name """ ============ Take Action ============ """ prev_obs = observation obs_next = env.update_observation_for_shipyard( board, shipyard_id, halite_action) reward = env.get_shipyard_reward( obs_next, env.wrap_observation_for_ship_agent( obs=obs_next, player=obs_next.player, spos=pos, # because shipyards can't move uid=shipyard_id), uid=shipyard_id, done=done) episode_rewards.append(reward) """ ============ Update Model ============ """ converted_next_obs, is_occupied_next = env.wrap_observation_for_shipyard_agent( obs_next, obs_next.player, spos=pos, uid=shipyard_id) next_state_vector = converted_next_obs.flatten() next_state_vector: np.ndarray = np.append(next_state_vector, is_occupied_next) if training: if simulated_step_learning: shipyard_agent.remember(state=state_vector, action=action, reward=reward, new_state=next_state_vector, done=done) shipyard_agent.learn(step_num=step_num, episode_num=episode_number) else: shipyard_temporary_initial_memory[shipyard_id] = { 'state': state_vector, 'action': action, 'pos': pos, 'is_occupied': is_occupied } if verbose and ((n_steps % 5) == 0): print( f"Step {step_num}: Action taken {action} for shipyard {shipyard_id}, " f"reward received {reward}") # update current observation with the simulated step ahead raw_observation = obs_next """ ==================================== ==================================== SHIPS ==================================== ==================================== """ for ship_id, (pos, halite) in step_observation.players[ step_observation.player][2].items(): # will change at each simulated step board = Board(raw_observation, raw_configuration=configuration) observation = Observation(raw_observation) """ ============ Take Action ============ """ converted_observation = env.wrap_observation_for_ship_agent( obs=Observation(board.observation), player=board.observation['player'], spos=int(pos), uid=ship_id) state_vector = converted_observation.flatten() action = ship_agent.get_action(state_vector, step=step_num, game=episode_number) episode_actions.append(action) halite_action = env.convert_ship_action_to_halite_enum( action, observation) if halite_action and halite_action.name == halite_action.CONVERT.name and \ observation.players[observation.player][0] < 500: # tried to convert without enough halite halite_action = None action = 5 if halite_action: actions_for_step[ship_id] = halite_action.name # Take action prev_obs = observation try: obs_next: Observation = env.update_observation_for_ship( board, ship_id, halite_action) except KeyError as e: print('Actions taken') print(actions_for_step) print('Current board and observation') print(board.ships.keys()) print(observation.players[observation.player]) print('Initial board and observation') print(step_observation.players[step_observation.player]) raise e # the ship may no longer exist... # ie it collided with an enemy ship or converted to a shipyard, we need to use the previous # for now we will use the new position IF it exists, otherwise just use the old one next_pos = obs_next.players[observation.player][2].get( ship_id, (None, None))[0] if not next_pos: next_pos = int(pos) reward = env.get_collector_ship_reward( obs_next, env.wrap_observation_for_ship_agent( obs=obs_next, player=obs_next.player, spos=pos, # because shipyards can't move uid=ship_id), ship_id, done=done) episode_rewards.append(reward) """ ============ Update Model ============ """ converted_next_obs = env.wrap_observation_for_ship_agent( obs=obs_next, player=obs_next.player, spos=next_pos, uid=ship_id) next_state_vector = converted_next_obs.flatten() # Update model if training: if simulated_step_learning: ship_agent.remember(state=state_vector, action=action, reward=reward, new_state=next_state_vector, done=done) ship_agent.learn(step_num=step_num, episode_num=episode_number) else: ship_temporary_initial_memory[ship_id] = { 'state': state_vector, 'action': action, 'pos': pos } action_string = halite_action.name if halite_action else 'None' if verbose and ((n_steps % 5) == 0): print( f"Step {step_num}: Action taken {action} | {action_string} for ship {ship_id}, " f"reward received {reward}") # update current observation with the simulated step ahead raw_observation = obs_next """ ================ ================ == Take Step ================ ================ """ # updates the env.observation step_results = env.step(actions=actions_for_step) print('Actions for step') print(actions_for_step) observation, game_reward, terminal = step_results if not simulated_step_learning: """ Here we are doing learning after the actual "step" has taken place. This means that the earlier a ship or shipyard has selected its move, the more unknowns and more "friendly reactions" that can occur afterwards. It would probably be very useful to include - remaining_ship_actions - remaining_shipyard_actions - and potentially the current epsilon value as a part of the state. """ player_halite = observation.players[observation.player][0] opponent_halites = [ item[0] for item in observation.players[observation.player:] ] best_opponent_halite = sorted(opponent_halites, reverse=True)[0] for ship_id, val in ship_temporary_initial_memory.items(): s = val['state'] a = val['action'] pos = val['pos'] converted_next_obs = env.wrap_observation_for_ship_agent( obs=Observation(observation), player=observation['player'], spos=int(pos), uid=ship_id) ship_reward = env.get_collector_ship_reward( observation=observation, converted_observation=converted_next_obs, uid=ship_id, done=done) next_state_vector = converted_next_obs.flatten() ship_agent.remember(state=s, action=a, reward=ship_reward, new_state=next_state_vector, done=done) ship_agent.learn(step_num=step_num, episode_num=episode_number) for shipyard_id, val in shipyard_temporary_initial_memory.items(): s = val['state'] a = val['action'] pos = val['pos'] is_occupied = val['is_occupied'] converted_next_obs, is_occupied_next = env.wrap_observation_for_shipyard_agent( obs=Observation(observation), player=observation['player'], spos=int(pos), uid=shipyard_id) print('For action: {}'.format(a)) shipyard_reward = env.get_shipyard_count_reward( observation=observation, converted_observation=converted_next_obs) next_state_vector = converted_next_obs.flatten() next_state_vector: np.ndarray = np.append( next_state_vector, is_occupied_next) shipyard_agent.remember(state=s, action=a, reward=shipyard_reward, new_state=next_state_vector, done=done) shipyard_agent.learn(step_num=step_num, episode_num=episode_number) episode_scores.append([item[0] for item in observation['players']]) raw_observation = observation return episode_scores
def _initialize_new_board(self): env = make("halite", self._configuration) obs = env.reset(self._num_players) return Board(raw_observation=obs[0]["observation"], raw_configuration=self._configuration)