Beispiel #1
0
def agent(obs, config):
    # Another for updates
    board = Board(obs, config)

    # Step of the board
    step = board.observation['step']

    ships = [
        ship.id for ship in sorted(board.current_player.ships,
                                   key=operator.attrgetter("halite"),
                                   reverse=True)
    ]
    actions = {}

    for ship_id in ships:
        if ship_id in board.current_player.ship_ids:
            next_action, action_type = DecisionShip(board, ship_id,
                                                    step).determine()

            if action_type != 'mine':
                actions[ship_id] = movement_dictionary[action_type]
                board.ships[ship_id].next_action = next_action
                board = board.next()

    shipyard_ids = ShipyardDecisions(board, board.current_player,
                                     step).determine()

    for shipyard_id in board.current_player.shipyard_ids:
        if shipyard_id in shipyard_ids:
            actions[shipyard_id] = 'SPAWN'
            board.shipyards[shipyard_id].next_action = ShipyardAction.SPAWN

            board = board.next()

    return actions
Beispiel #2
0
def transform_reward(done, last_obs, obs, config):
    board = Board(obs, config)
    me = board.current_player

    nships = len(me.ships)
    nyards = len(me.shipyards)
    halite = me.halite
    cargo = sum(s.halite for s in me.ships)

    if nships == 0:
        if nyards == 0:
            return REWARD_LOST

        if halite < config.spawnCost:
            return REWARD_LOST

    if done:
        scores = [p.halite for p in board.players.values() if
                  len(p.ships) > 0 or
                  (len(p.shipyards) > 0 and p.halite >= config.spawnCost)]

        if halite == max(scores):
            if scores.count(halite) == 1:
                return REWARD_WON
        return REWARD_LOST

    delta = 0

    if last_obs is not None:
        last_board = Board(last_obs, config)
        last_me = last_board.current_player

        last_nships = len(last_me.ships)
        last_nyards = len(last_me.shipyards)
        last_halite = last_me.halite
        last_cargo = sum(s.halite for s in last_me.ships)

        delta_ships = (nships - last_nships) * config.spawnCost
        delta_yards = (nyards - last_nyards) * (config.convertCost + config.spawnCost)
        delta_halite = halite - last_halite
        delta_cargo = cargo - last_cargo

        delta = delta_ships + delta_yards + delta_halite + delta_cargo

        if nyards == 0:
            delta -= config.convertCost

        if nships == 0:
            delta -= config.spawnCost

        delta = float(np.clip(delta / MAX_DELTA, -1, 1))

    reward = delta + 0.01
    return reward
Beispiel #3
0
def transform_actions(actions, obs, config):
    next_actions = dict()

    board = Board(obs, config)
    me = board.current_player

    board_cells = sort_cells(board.cells)

    si = 0
    yi = MAX_SHIPS

    for _, c in board_cells.items():
        if c.ship in me.ships and si < MAX_SHIPS:
            i = actions[si]
            ship_action = SHIP_ACTIONS[i]
            si += 1

            if ship_action is not None:
                next_actions[c.ship.id] = ship_action

        if c.shipyard in me.shipyards and yi < MAX_SHIPS + MAX_YARDS:
            i = actions[yi]
            yard_action = YARD_ACTIONS[i]
            yi += 1

            if yard_action is not None:
                next_actions[c.shipyard.id] = yard_action

    return next_actions
Beispiel #4
0
def halite_sap():
    base_path = os.path.dirname(os.path.realpath(__file__))
    sample_file = os.path.join(base_path, "../samples/3602885.json")
    with open(sample_file) as f:
        episode_json = json.load(f)

    replay = json.loads(episode_json["replay"])

    team_name = "Stanley Zheng"
    team_idx = replay["info"]["TeamNames"].index(team_name)

    # This step was specifically chosen because it contains a Shipyard.SPAWN action.
    step_idx = 202
    hsap = HaliteStateActionPair(
        board=Board(
            raw_observation=replay['steps'][step_idx - 1][0]['observation'],
            raw_configuration=replay['configuration'],
            next_actions=[
                replay['steps'][step_idx][0]["action"],
                replay['steps'][step_idx][1]["action"]
            ],
        ),
        cur_team_id=team_idx,
    )
    return hsap
def agent(obs, config):
    size = config.size
    board = Board(obs, config)
    me = board.current_player

    ship_states = {}
    yard_states = {}
    exp_part = math.exp(config.steps / EPS_DECAY)
    eps_threshold = EPS_END + (EPS_START - EPS_END) * exp_part

    sample = random.random()
    for yard in me.shipyards:
        yard_states[yard.id] = yard.position

    for ship in me.ships:
        ship_states[ship.id] = ship.position

    for ship in me.ships:
        if sample > eps_threshold:
            print("Predicting move")
            current_state = get_state(obs['halite'], ship_states, yard_states,
                                      size)
            move = policy_net(current_state).argmax().item()
            ship.next_action = ship_actions[move]
        else:
            print("random move")
            ship.next_action = random.choice(ship_actions)

    return me.next_actions, ship_states, yard_states
 def update_observation_for_shipyard(board: Board, uid, action):
     """Simulate environment step forward and update observation
     https://www.kaggle.com/sam/halite-sdk-overview#Simulating-Actions-(Lookahead)
     """
     ship = board.shipyards[uid]
     ship.next_action = action
     ret_val = board.next()
     return Observation(ret_val.observation)
Beispiel #7
0
def agent(obs, config):
    global commander
    if commander is None:
        commander = Commander()
    # add method to convert coordinate system back to topleft 0,0
    Point.norm = property(lambda self: from_point(self, config.size))

    board = Board(obs, config)
    commander.update(board, obs)
    return commander.get_next_actions()
Beispiel #8
0
def agent(obs, config):
    # Another for updates
    board = Board(obs, config)

    # Step of the board
    step = board.observation['step']

    ships = [ship.id for ship in sorted(board.current_player.ships, key=operator.attrgetter("halite"), reverse=True)]
    actions = {}

    # It would be absurd to log when I am out of the game
    if not(len(board.current_player.ships) == 0 and board.current_player.halite < 500):
        log(str(step + 1) + '|-----------------------------------------------------------------------')

    for ship_id in ships:
        if ship_id in board.current_player.ship_ids:
            log(' Pos:' + str(board.ships[ship_id].position) + ', cargo: ' + str(board.ships[ship_id].halite) + ', player halite: ' + str(board.current_player.halite))
                
            next_action, action_type = DecisionShip(board, ship_id, step).determine()
                
            if action_type != 'mine':
                actions[ship_id] = movement_dictionary[action_type]
                board.ships[ship_id].next_action = next_action
                if step == 200: log(board)
                board = board.next()
                if step == 200: log(board)
        # else:
        #     log(' Not found')

    shipyard_ids = ShipyardDecisions(board, board.current_player, step).determine()

    for shipyard_id in board.current_player.shipyard_ids:
        if shipyard_id in shipyard_ids:
            actions[shipyard_id] = 'SPAWN'
            board.shipyards[shipyard_id].next_action = ShipyardAction.SPAWN
            
            board = board.next()
        
    return actions
Beispiel #9
0
    def __call__(
        self, obs: Dict[str, Any], configuration: Dict[str, Any]
    ) -> Dict[Id, str]:
        try:
            # easier to re-run agent on downloaded replays; note that this might influence other bots too
            # random.seed(123)
            # np.random.seed(123)  # type: ignore
            board = Board(obs, configuration)

            print(f"Step {board.step}, Player Halite: {board.current_player.halite}")

            state = State.from_board(board)

            context = Context(
                board=board, config=self.config, state=state, memory=self.memory
            )

            missions = self.make_missions(board, context)

            num_limited_missions = self.num_limit_missions(missions)

            resolved_missions = self.resolve_missions(num_limited_missions)

            self.validate_final_missions(resolved_missions, context)

            actions = self.make_actions(resolved_missions)

            affordable_actions = self.select_affordable_actions(actions, context)

            resolved_actions = self.resolve_actions(affordable_actions)

            self.validate_final_actions(resolved_actions, context)

            halite_actions = self.make_halite_actions(resolved_actions)

            self.print_state(
                board=board,
                num_limited_missions=num_limited_missions,
                resolved_missions=resolved_missions,
                resolved_actions=resolved_actions,
            )

            return halite_actions
        except Exception:
            traceback.print_exc(
                file=sys.stderr
            )  # you can download logs from Kaggle and inspect what went wrong
            raise
def agent(obs, config):
    size = config.size
    board = Board(obs, config)
    me = board.current_player
    booked_positions = []

    if len(me.ships) < 4 and len(me.shipyards) > 0:
        yard = me.shipyards[0]
        if yard.cell.ship is None:
            yard.next_action = ShipyardAction.SPAWN

    if len(me.shipyards) == 0 and len(me.ships) > 0:
        me.ships[0].next_action = ShipAction.CONVERT

    ship_details = {}
    yard_details = {}
    ship_states = {}

    for yard in me.shipyards:
        yard_details[yard.id] = yard.position

    for ship in me.ships:
        ship_details[ship.id] = ship.position
        booked_positions.append(ship.position)
        if ship.next_action is None:
            if ship.halite < 200:
                ship_states[ship.id] = 'COLLECT'
            if ship.halite > 500:
                ship_states[ship.id] = 'DEPOSIT'

            if ship_states[ship.id] == 'COLLECT':
                next_action, booked_pt = ship_collect(ship, booked_positions)
                ship.next_action = next_action
                if booked_pt is not None:
                    booked_positions.append(booked_pt)
            if ship_states[ship.id] == 'DEPOSIT':
                next_action, booked_pt = ship_deposit(ship, me.shipyards[0],
                                                      size, booked_positions)
                ship.next_action = next_action
                if booked_pt is not None:
                    booked_positions.append(booked_pt)

    return me.next_actions, ship_details, yard_details
Beispiel #11
0
 def __init__(self, obs, config):
     self.board = Board(obs, config)
     self.board_prev = None
     self.config = self.board.configuration
     self.me = self.board.current_player
     self.dim = config.size
     self.mid = config.size // 2
     self.quadrant_position = self.me.ships[0].position
     self.quadrant_points = self.get_quadrant_points()
     self.harvest_spot_values = None
     self.enemy_ship_points = None
     self.halite_global_mean = None
     self.halite_global_median = None
     self.halite_global_std = None
     self.yardcount = None
     self.prospective_yard = None
     self.action_iter = None
     self.keep_spawning_tripswitch = True
     self.cell_halite_minimum = None
     self.ship_carry_maximum = None  # calculated at start of each loop
     self.halite_harvest_minimum = None
     self.generate_constants()
Beispiel #12
0
def load_examples_from_episode(episode_file, team_name):
    with open(episode_file) as f:
        episode_json = json.load(f)

    replay = json.loads(episode_json["replay"])

    # Determine whether team of interest is player 0 or 1.
    # Raises exception if neither team name matches.
    team_idx = replay["info"]["TeamNames"].index(team_name)

    for step_idx, step in enumerate(replay['steps']):
        hsap = HaliteStateActionPair(
            board=Board(
                raw_observation=replay['steps'][step_idx -
                                                1][0]['observation'],
                raw_configuration=replay['configuration'],
                next_actions=[step[0]["action"], step[1]["action"]],
            ),
            cur_team_id=team_idx,
        )
        state = hsap.to_state_array()
        ship_actions, shipyard_actions = hsap.to_action_arrays()
        yield (state, ship_actions, shipyard_actions)
Beispiel #13
0
 def __call__(self, observation, configuration):
     board = Board(observation, configuration)
     self.set_board_actions(board)
     return board.current_player.next_actions
    def get_single_ship_move(self, ship_id, pos, step_observation,
                             raw_observation, ship_simulated_step_memory):
        done = False

        board = Board(raw_observation, raw_configuration=self.configuration)
        observation = Observation(raw_observation)
        """
        ============
        Take Action
        ============
        """
        converted_observation = self.env.wrap_observation_for_ship_agent(
            obs=Observation(board.observation),
            player=board.observation['player'],
            spos=int(pos),
            uid=ship_id)
        state_vector = converted_observation.flatten()

        if self.ship_frame_stack_len > 1:
            multiframe_state = self.env.get_multiframe_ship_observation(
                ship_id)
            converted_obs = np.concatenate(multiframe_state, axis=0)
            state_vector = converted_obs.flatten()
        if len(self.env.get_multiframe_ship_observation(
                ship_id)) == self.ship_frame_stack_len:
            action = self.ship_agent.get_action(state_vector,
                                                step=self.step_number,
                                                game=self.episode_number)
        else:
            action = np.random.randint(0, 6)

        self.episode_actions.append(action)

        halite_action = self.env.convert_ship_action_to_halite_enum(
            action, observation)

        if halite_action and halite_action.name == halite_action.CONVERT.name and \
                observation.players[observation.player][0] < 500:
            # tried to convert without enough halite
            halite_action = None
            action = 5

        if halite_action:
            self.actions_for_step[ship_id] = halite_action.name

        # Take action
        try:
            obs_next: Observation = self.env.update_observation_for_ship(
                board, ship_id, halite_action)
        except KeyError as e:
            print('Actions taken')
            print(self.actions_for_step)
            print('Initial board and observation')
            print(step_observation.players[step_observation.player])
            raise e

        # the ship may no longer exist...
        # ie it collided with an enemy ship or converted to a shipyard, we need to use the previous
        # for now we will use the new position IF it exists, otherwise just use the old one
        # next_pos = obs_next.players[observation.player][2].get(ship_id, (None, None))[0]
        """
        ============
        Prepare for Model Update
        ============
        """

        # Update model
        if self.training:
            ship_simulated_step_memory[ship_id] = {
                'state': state_vector,
                'action': action,
                'pos': pos
            }
        action_string = halite_action.name if halite_action else 'None'

        if self.verbose and ((self.step_number % 10) == 0):
            print(
                f"Step {self.step_number}: Action taken {action} | {action_string} for ship {ship_id}, "
                f"reward received N/A | Player state {obs_next.players[observation.player]}"
            )
        # update current observation with the simulated step ahead
        raw_observation = obs_next

        return raw_observation
    def get_single_shipyard_move(self, shipyard_id, pos, step_observation,
                                 raw_observation,
                                 shipyard_temporary_initial_memory):
        configuration = self.configuration
        board = Board(raw_observation, raw_configuration=configuration)
        observation = Observation(raw_observation)

        verbose = self.verbose
        done = False

        # Select action
        converted_observation, is_occupied = self.env.wrap_observation_for_shipyard_agent(
            obs=observation,
            player=observation.player,
            spos=pos,
            uid=shipyard_id)
        state_vector = converted_observation.flatten()
        state_vector: np.ndarray = np.append(state_vector, is_occupied)

        action = self.shipyard_agent.get_action(state_vector,
                                                step=self.step_number,
                                                game=self.episode_number)
        halite_action = self.env.convert_shipyard_action_to_halite_enum(
            action, shipyard_id, observation)
        self.episode_actions.append(halite_action)
        """
        ============
        Take Action
        ============
        """
        obs_next = self.env.update_observation_for_shipyard(
            board, shipyard_id, halite_action)

        reward = self.env.get_shipyard_reward(
            obs_next,
            self.env.wrap_observation_for_ship_agent(
                obs=obs_next,
                player=obs_next.player,
                spos=pos,  # because shipyards can't move
                uid=shipyard_id),
            uid=shipyard_id,
            done=done)

        self.episode_rewards.append(reward)
        """
        ============
        Prepare for Update Model
        ============
        """

        if self.training:
            shipyard_temporary_initial_memory[shipyard_id] = {
                'state': state_vector,
                'action': action,
                'pos': pos,
                'is_occupied': is_occupied
            }

        if verbose and ((self.step_number % 10) == 0):
            print(
                f"Step {self.step_number}: Action taken {action} for shipyard {shipyard_id}, "
                f"reward received {reward}")
        # update current observation with the simulated step ahead
        raw_observation = obs_next

        return raw_observation
    def get_single_shipyard_move(
        self,
        shipyard_id,
        pos,
        step_observation,
        raw_observation,
        shipyard_temporary_initial_memory,
        step_number=0,
        episode_number=0,
    ):
        configuration = self.configuration
        board = Board(raw_observation, raw_configuration=configuration)
        observation = Observation(raw_observation)

        verbose = self.verbose
        done = False

        # Select action
        converted_observation = self.shipyard_state_wrapper.get_basic_single_frame_complete_observation(
            obs=observation,
            player=observation.player,
            sy_pos=pos,
            uid=shipyard_id)
        state_vector = converted_observation

        player_state = step_observation.players[step_observation.player]

        if len(player_state[2]) == 0 and player_state[0] > 500:
            action = 1
        else:
            action = self.shipyard_agent.get_action(state_vector,
                                                    step=step_number,
                                                    game=episode_number)
        halite_action = self.shipyard_state_wrapper.convert_action_to_enum(
            shipyard_id, observation, action)

        if halite_action:
            self.actions_for_step[shipyard_id] = halite_action.name
        """
        ============
        Take Action
        ============
        """
        obs_next = self.env.update_observation_for_shipyard(
            board, shipyard_id, halite_action)

        reward = self.env.get_shipyard_reward(
            obs_next,
            self.env.wrap_observation_for_shipyard_agent(
                obs=obs_next,
                player=obs_next.player,
                spos=pos,  # because shipyards can't move
                uid=shipyard_id),
            uid=shipyard_id,
            done=done)
        """
        ============
        Prepare for Update Model
        ============
        """

        is_occupied = state_vector[-2]

        if self.training:
            shipyard_temporary_initial_memory[shipyard_id] = {
                'state': state_vector,
                'action': action,
                'pos': pos,
                'is_occupied': is_occupied
            }

        if verbose and ((step_number % 10) == 0):
            print(
                f"Step {step_number}: Action taken {action} for shipyard {shipyard_id}, "
                f"reward received {reward}")
        # update current observation with the simulated step ahead
        raw_observation = obs_next

        return raw_observation
Beispiel #17
0
    def get_actions(self, obs, config):
        """Main loop"""
        self.board = Board(obs, config)
        self.me = self.board.current_player
        me = self.me  # just for shorthand
        spawncount = 0
        self.refresh_ships()
        self.yardcount = len(self.me.shipyards)

        self.setup_stats()
        # Main ship loop - iterate until each ship has an action
        # TODO - ships on SY should go first
        self.action_iter = 0
        while len(LOG.set_actions) != len(me.ships):
            self.action_iter += 1
            if self.action_iter > 24:
                raise BaseException(
                    f"action resolution iteration > 24 - probable infinite loop"
                )
            if self.action_iter % 10 == 0:
                print(f"Action Iter:{self.action_iter}")

            # If no yards, create and mark point
            p0sy = [
                sy for sy in self.board.shipyards.values() if sy.player_id == 0
            ]
            p0sy = p0sy[0].position if len(p0sy) > 0 else Point(5, 15)
            if len(self.me.shipyards) == 0 or any(
                [self.dist(s.position, p0sy) <= 2 for s in self.me.ships]):
                ship = self.get_best_ship_for_yard()
                ship.next_action = ShipAction.CONVERT
                ship.log.set_action = ShipAction.CONVERT
                # conversion is resolved before collision - we don't need to reserve point with log.set_point
                ship.log.p_point = None
                self.prospective_yard = Shipyard('PROSPECTIVE', ship.position,
                                                 self.me.id, self.board)

            # Calculate best potential actions
            for ship in [s for s in me.ships if s.log.set_action is None]:
                self.determine_ship_action(ship)

            # Confirm non-conflicting actions. Record set actions in ship log to keep track of
            # how many ships actions are finalized.
            p2s = LOG.p_point2ship  # point2ship map - excluding set ships
            for point, ships in p2s.items():
                if len(ships) == 1:  # Only contender - give action
                    ship = ships[0]
                    action, point = ship.log.p_action, ship.log.p_point
                    ship.next_action = action if action != 'WAIT' else None
                    ship.log.set_action, ship.log.set_point = action, point
                    # When ship action is calculated above, any set points should now not be possibilities.
                else:  # Give spot to highest priority ship (currently highest halite)
                    ships_by_halite = sorted([(s, s.halite) for s in ships],
                                             key=lambda x: -x[1])
                    priority_ship, halite = ships_by_halite[0]
                    action, point = priority_ship.log.p_action, priority_ship.log.p_point
                    priority_ship.next_action = action if action != 'WAIT' else None
                    priority_ship.log.set_action, priority_ship.log.set_point = action, point

        # Ship building
        h2ns = [(p.halite, len(p.ships)) for p in self.board.players.values()
                if p.id is not me.id]
        nships_other = sorted(h2ns, key=lambda x: -x[0])[0][1]
        should_still_spawn = ((len(me.ships) <= nships_other) or (obs.step < 20)) \
                             and (obs.step < 360)
        reserve = config.convertCost if obs.step > 20 else 0
        for shipyard in me.shipyards:
            # If we can afford spawn, considering cumulation of other SY spawns and keeping a reserve for one yard.
            have_enough_halite = (me.halite - spawncount * config.spawnCost -
                                  reserve) >= config.spawnCost
            no_ship_reserved_point = shipyard.position not in LOG.set_points
            if self.me.halite > 1000:
                shipyard.next_action = ShipyardAction.SPAWN
                spawncount += 1
        self.board_prev = self.board
        return me.next_actions
Beispiel #18
0
def transform_observation(obs, config):
    board = Board(obs, config)
    me = board.current_player

    board_cells = sort_cells(board.cells)

    step = []
    cell_yield = []
    me_yard = []
    me_ship = []
    me_ship_cargo = []
    opp_yard = []
    opp_ship = []
    opp_ship_cargo = []

    for _, c in board_cells.items():
        step.append(obs['step'] / config.episodeSteps)

        cell_yield.append(c.halite / config.maxCellHalite)

        if c.ship is None:
            me_ship.append(0)
            me_ship_cargo.append(0)
            opp_ship.append(0)
            opp_ship_cargo.append(0)

        elif c.ship in me.ships:
            me_ship.append(1)
            me_ship_cargo.append(c.ship.halite / MAX_SHIP_HALITE)
            opp_ship.append(0)
            opp_ship_cargo.append(0)

        else:
            me_ship.append(0)
            me_ship_cargo.append(0)
            opp_ship.append(1)
            opp_ship_cargo.append(c.ship.halite / MAX_SHIP_HALITE)

        if c.shipyard is None:
            me_yard.append(0)
            opp_yard.append(0)

        elif c.shipyard in me.shipyards:
            me_yard.append(1)
            opp_yard.append(0)

        else:
            me_yard.append(0)
            opp_yard.append(1)

    x_obs = np.vstack((step,
                       cell_yield,
                       me_yard,
                       me_ship,
                       me_ship_cargo,
                       opp_yard,
                       opp_ship,
                       opp_ship_cargo))

    x_obs = x_obs.reshape(config.size, config.size, N_FEATURES)
    x_obs = x_obs.astype(np.float32).clip(0, 1)

    return x_obs
 def reset(self):
     """Reset trainer environment"""
     self.observation = self.trainer.reset()
     board = Board(self.observation, self.environment.configuration)
     return self.observation, board
Beispiel #20
0
def play_episode(env: HaliteEnv,
                 ship_agent: Agent,
                 shipyard_agent: Agent,
                 configuration,
                 n_steps: int = 10,
                 verbose: bool = True,
                 training: bool = False,
                 simulated_step_learning: bool = False,
                 episode_number=0):
    episode_rewards = []
    episode_actions = []

    episode_scores = []

    raw_observation: dict = env.reset()[0].__dict__
    print('ep: {}'.format(episode_number))
    done = False

    for step_num in range(n_steps):
        if done:
            board = Board(raw_observation, raw_configuration=configuration)
            print('Done')
            print(board)
            return episode_scores

        actions_for_step = {}

        # wont change
        step_observation = Observation(raw_observation)

        shipyard_temporary_initial_memory = {}
        ship_temporary_initial_memory = {}
        """
        ====================================
        ====================================
        SHIPYARDS
        ====================================
        ====================================
        """
        for shipyard_id, (pos) in step_observation.players[
                step_observation.player][1].items():
            # will change at each simulated step
            board = Board(raw_observation, raw_configuration=configuration)
            observation = Observation(raw_observation)

            # Select action
            converted_observation, is_occupied = env.wrap_observation_for_shipyard_agent(
                obs=observation,
                player=observation.player,
                spos=pos,
                uid=shipyard_id)
            state_vector = converted_observation.flatten()
            state_vector: np.ndarray = np.append(state_vector, is_occupied)

            action = shipyard_agent.get_action(state_vector,
                                               step=step_num,
                                               game=episode_number)
            halite_action = env.convert_shipyard_action_to_halite_enum(
                action, shipyard_id, observation)
            episode_actions.append(halite_action)

            # re-aligning action and halite action
            # TODO: should refactor
            if halite_action == ShipyardAction.SPAWN:
                action = 1
            else:
                action = 0

            if halite_action:
                actions_for_step[shipyard_id] = halite_action.name
            """
            ============
            Take Action
            ============
            """
            prev_obs = observation
            obs_next = env.update_observation_for_shipyard(
                board, shipyard_id, halite_action)

            reward = env.get_shipyard_reward(
                obs_next,
                env.wrap_observation_for_ship_agent(
                    obs=obs_next,
                    player=obs_next.player,
                    spos=pos,  # because shipyards can't move
                    uid=shipyard_id),
                uid=shipyard_id,
                done=done)

            episode_rewards.append(reward)
            """
            ============
            Update Model
            ============
            """

            converted_next_obs, is_occupied_next = env.wrap_observation_for_shipyard_agent(
                obs_next, obs_next.player, spos=pos, uid=shipyard_id)
            next_state_vector = converted_next_obs.flatten()
            next_state_vector: np.ndarray = np.append(next_state_vector,
                                                      is_occupied_next)

            if training:
                if simulated_step_learning:
                    shipyard_agent.remember(state=state_vector,
                                            action=action,
                                            reward=reward,
                                            new_state=next_state_vector,
                                            done=done)
                    shipyard_agent.learn(step_num=step_num,
                                         episode_num=episode_number)
                else:
                    shipyard_temporary_initial_memory[shipyard_id] = {
                        'state': state_vector,
                        'action': action,
                        'pos': pos,
                        'is_occupied': is_occupied
                    }

            if verbose and ((n_steps % 5) == 0):
                print(
                    f"Step {step_num}: Action taken {action} for shipyard {shipyard_id}, "
                    f"reward received {reward}")
            # update current observation with the simulated step ahead
            raw_observation = obs_next
        """
        ====================================
        ====================================
        SHIPS
        ====================================
        ====================================
        """
        for ship_id, (pos, halite) in step_observation.players[
                step_observation.player][2].items():
            # will change at each simulated step
            board = Board(raw_observation, raw_configuration=configuration)
            observation = Observation(raw_observation)
            """
            ============
            Take Action
            ============
            """
            converted_observation = env.wrap_observation_for_ship_agent(
                obs=Observation(board.observation),
                player=board.observation['player'],
                spos=int(pos),
                uid=ship_id)
            state_vector = converted_observation.flatten()
            action = ship_agent.get_action(state_vector,
                                           step=step_num,
                                           game=episode_number)
            episode_actions.append(action)

            halite_action = env.convert_ship_action_to_halite_enum(
                action, observation)

            if halite_action and halite_action.name == halite_action.CONVERT.name and \
                observation.players[observation.player][0] < 500:
                # tried to convert without enough halite
                halite_action = None
                action = 5

            if halite_action:
                actions_for_step[ship_id] = halite_action.name

            # Take action
            prev_obs = observation
            try:
                obs_next: Observation = env.update_observation_for_ship(
                    board, ship_id, halite_action)
            except KeyError as e:
                print('Actions taken')
                print(actions_for_step)
                print('Current board and observation')
                print(board.ships.keys())
                print(observation.players[observation.player])
                print('Initial board and observation')
                print(step_observation.players[step_observation.player])
                raise e

            # the ship may no longer exist...
            # ie it collided with an enemy ship or converted to a shipyard, we need to use the previous
            # for now we will use the new position IF it exists, otherwise just use the old one
            next_pos = obs_next.players[observation.player][2].get(
                ship_id, (None, None))[0]

            if not next_pos:
                next_pos = int(pos)

            reward = env.get_collector_ship_reward(
                obs_next,
                env.wrap_observation_for_ship_agent(
                    obs=obs_next,
                    player=obs_next.player,
                    spos=pos,  # because shipyards can't move
                    uid=ship_id),
                ship_id,
                done=done)

            episode_rewards.append(reward)
            """
            ============
            Update Model
            ============
            """

            converted_next_obs = env.wrap_observation_for_ship_agent(
                obs=obs_next,
                player=obs_next.player,
                spos=next_pos,
                uid=ship_id)
            next_state_vector = converted_next_obs.flatten()

            # Update model
            if training:
                if simulated_step_learning:
                    ship_agent.remember(state=state_vector,
                                        action=action,
                                        reward=reward,
                                        new_state=next_state_vector,
                                        done=done)
                    ship_agent.learn(step_num=step_num,
                                     episode_num=episode_number)
                else:
                    ship_temporary_initial_memory[ship_id] = {
                        'state': state_vector,
                        'action': action,
                        'pos': pos
                    }
            action_string = halite_action.name if halite_action else 'None'

            if verbose and ((n_steps % 5) == 0):
                print(
                    f"Step {step_num}: Action taken {action} | {action_string} for ship {ship_id}, "
                    f"reward received {reward}")
            # update current observation with the simulated step ahead
            raw_observation = obs_next
        """
        ================        
        ================
        == Take Step
        ================
        ================
        """

        # updates the env.observation
        step_results = env.step(actions=actions_for_step)

        print('Actions for step')
        print(actions_for_step)

        observation, game_reward, terminal = step_results

        if not simulated_step_learning:
            """
            Here we are doing learning after the actual "step" has taken place.

            This means that the earlier a ship or shipyard has selected its move, 
            the more unknowns and more "friendly reactions" that can occur afterwards.

            It would probably be very useful to include 
                - remaining_ship_actions
                - remaining_shipyard_actions
                - and potentially the current epsilon value
            as a part of the state.
            """

            player_halite = observation.players[observation.player][0]
            opponent_halites = [
                item[0] for item in observation.players[observation.player:]
            ]
            best_opponent_halite = sorted(opponent_halites, reverse=True)[0]

            for ship_id, val in ship_temporary_initial_memory.items():
                s = val['state']
                a = val['action']
                pos = val['pos']
                converted_next_obs = env.wrap_observation_for_ship_agent(
                    obs=Observation(observation),
                    player=observation['player'],
                    spos=int(pos),
                    uid=ship_id)
                ship_reward = env.get_collector_ship_reward(
                    observation=observation,
                    converted_observation=converted_next_obs,
                    uid=ship_id,
                    done=done)
                next_state_vector = converted_next_obs.flatten()
                ship_agent.remember(state=s,
                                    action=a,
                                    reward=ship_reward,
                                    new_state=next_state_vector,
                                    done=done)
                ship_agent.learn(step_num=step_num, episode_num=episode_number)

            for shipyard_id, val in shipyard_temporary_initial_memory.items():
                s = val['state']
                a = val['action']
                pos = val['pos']
                is_occupied = val['is_occupied']
                converted_next_obs, is_occupied_next = env.wrap_observation_for_shipyard_agent(
                    obs=Observation(observation),
                    player=observation['player'],
                    spos=int(pos),
                    uid=shipyard_id)
                print('For action: {}'.format(a))
                shipyard_reward = env.get_shipyard_count_reward(
                    observation=observation,
                    converted_observation=converted_next_obs)
                next_state_vector = converted_next_obs.flatten()
                next_state_vector: np.ndarray = np.append(
                    next_state_vector, is_occupied_next)

                shipyard_agent.remember(state=s,
                                        action=a,
                                        reward=shipyard_reward,
                                        new_state=next_state_vector,
                                        done=done)
                shipyard_agent.learn(step_num=step_num,
                                     episode_num=episode_number)

        episode_scores.append([item[0] for item in observation['players']])
        raw_observation = observation

    return episode_scores
 def _initialize_new_board(self):
     env = make("halite", self._configuration)
     obs = env.reset(self._num_players)
     return Board(raw_observation=obs[0]["observation"], raw_configuration=self._configuration)