Ejemplo n.º 1
0
    def update_grid_policy_ace(self, algorithm: TabularAlgorithm, usable_ace: bool):
        policy: TabularPolicy = algorithm.target_policy
        # policy_: policy.Deterministic
        for s, state in enumerate(self.states):
            if not state.is_terminal and state.usable_ace == usable_ace:
                # dealer_card is x, player_sum is y : following the table in the book
                x = state.dealers_card - self.dealers_card_min
                y = state.player_sum - self.player_sum_min
                position: common.XY = common.XY(x, y)
                action: Action = policy.get_action(s)   # type: ignore
                policy_value: int = int(action.hit)
                # print(position, transfer_1_to_2)
                self.grid_world.set_policy_value(
                    position=position,
                    policy_value=policy_value,
                )

                if algorithm.Q:
                    policy_a: int = policy[s]
                    is_terminal: bool = self.is_terminal[s]
                    for a, action in enumerate(self.actions):
                        if self.s_a_compatibility[s, a]:
                            is_policy: bool = (not is_terminal and policy_a == a)
                            if action.hit:
                                y = 1
                            else:
                                y = -1
                            move: common.XY = common.XY(0, y)
                            self.grid_world.set_move_q_value(
                                position=position,
                                move=move,
                                q_value=algorithm.Q[s, a],
                                is_policy=is_policy
                            )
Ejemplo n.º 2
0
def q_test() -> bool:
    environment_parameters = EnvironmentParameters(
        environment_type=common.EnvironmentType.CLIFF,
        actions_list=common.ActionsList.FOUR_MOVES
    )
    environment = Environment(environment_parameters)
    environment.build()
    q = state_action_function.StateActionFunction(environment, initial_value=-7.0)

    state_ = State(is_terminal=False, position=common.XY(x=4, y=2))
    s = environment.state_index[state_]
    print(f"state_.index {s}")

    action_ = Action(common.XY(x=1, y=0))
    a = environment.action_index[action_]
    print(f"action_.index {a}")

    print(q[s, a])
    q[s, a] = 2.0
    q[s, a] += 0.5
    print(q[s, a])

    # noinspection PyProtectedMember
    print(f"Q: {q.matrix}")

    return True
Ejemplo n.º 3
0
def cliff_test() -> bool:
    environment_parameters = EnvironmentParameters()
    environment = Environment(environment_parameters)
    environment.build()
    print(type(environment))

    for state_ in environment.states:
        state_index = environment.state_index[state_]
        print(f"{state_} \t index={state_index}")

    print()

    for action_ in environment.actions:
        action_index = environment.action_index[action_]
        print(f"{action_} \t index={action_index}")

    print()

    state_ = state.State(is_terminal=False, position=common.XY(x=4, y=2))
    action_ = action.Action(common.XY(x=1, y=0))
    observation_ = environment.from_state_perform_action(state_, action_)
    print(state_, action_)
    print(observation_)

    state_ = state.State(is_terminal=False, position=common.XY(x=6, y=1))
    action_ = action.Action(common.XY(x=0, y=-1))
    observation_ = environment.from_state_perform_action(state_, action_)
    print(state_, action_)
    print(observation_)

    return True
Ejemplo n.º 4
0
def _four_cliff_friendly_moves() -> list[Action]:
    return [
        # right
        Action(move=common.XY(+1, 0)),
        # up
        Action(move=common.XY(0, +1)),
        # left
        Action(move=common.XY(-1, 0)),
        # down
        Action(move=common.XY(0, -1))
    ]
Ejemplo n.º 5
0
    def _build_actions(self):
        # important this is the default for e-greedy else never terminates
        new_action: Action = Action(acceleration=common.XY(x=0, y=0))
        self.actions.append(new_action)

        for ax in range(self._min_ax, self._max_ax + 1):
            for ay in range(self._min_ay, self._max_ay + 1):
                if ax != 0 and ay != 0:
                    new_action: Action = Action(
                        acceleration=common.XY(x=ax, y=ay))
                    self.actions.append(new_action)
Ejemplo n.º 6
0
 def _build_states(self):
     """set S"""
     for x in range(self.grid_world.max_x + 1):
         for y in range(self.grid_world.max_y + 1):
             position: common.XY = common.XY(x=x, y=y)
             is_terminal: bool = self.grid_world.is_at_goal(position)
             for vx in range(self._min_vx, self._max_vx + 1):
                 for vy in range(self._min_vy, self._max_vy + 1):
                     new_state: State = State(is_terminal=is_terminal,
                                              position=position,
                                              velocity=common.XY(x=vx,
                                                                 y=vy))
                     self.states.append(new_state)
Ejemplo n.º 7
0
 def change_request(self, current_position: common.XY,
                    move: Optional[common.XY]) -> common.XY:
     move = self._get_random_movement()
     requested_position: common.XY = common.XY(
         x=current_position.x + move.x, y=current_position.y + move.y)
     # project back to grid if outside
     new_position: common.XY = self.project_back_to_grid(requested_position)
     return new_position
Ejemplo n.º 8
0
 def _load_gridworld(self):
     self._set_sizes()
     self._grid_surface.fill(self._background_color)
     for x in range(self._max_x + 1):
         for y in range(self._max_y + 1):
             position: common.XY = common.XY(x, y)
             self._draw_square(self._grid_surface, position, draw_background=True)
     self._copy_grid_into_background()
Ejemplo n.º 9
0
    def change_request(self, position: common.XY, velocity: common.XY, acceleration: common.XY)\
            -> tuple[common.XY, common.XY]:
        u: float = utils.uniform()
        if u > self.skid_probability:   # not skidding
            new_velocity = common.XY(
                x=velocity.x + acceleration.x,
                y=velocity.y + acceleration.y
            )
        else:  # skid
            new_velocity = velocity

        new_position: common.XY = common.XY(
            x=position.x + new_velocity.x,
            y=position.y + new_velocity.y
        )
        # project back to grid if outside
        # new_position: common.XY = self._project_back_to_grid(expected_position)
        return new_position, new_velocity
Ejemplo n.º 10
0
 def change_request(self, current_position: common.XY,
                    move: common.XY) -> common.XY:
     wind = self._get_wind(current_position)
     requested_position: common.XY = common.XY(
         x=current_position.x + move.x + wind.x,
         y=current_position.y + move.y + wind.y)
     # project back to grid if outside
     new_position: common.XY = self.project_back_to_grid(requested_position)
     return new_position
Ejemplo n.º 11
0
def _kings_moves(include_center: bool = False) -> list[Action]:
    action_list: list[Action] = []
    for x in (-1, 0, 1):
        for y in (-1, 0, 1):
            include: bool = True
            if x == 0 and y == 0:
                include = include_center
            if include:
                action_list.append(Action(move=common.XY(x, y)))
    return action_list
Ejemplo n.º 12
0
    def _get_wind(self, current_position: common.XY) -> common.XY:
        extra_wind: int
        if self.random_wind:
            extra_wind = self.random_wind_distribution.draw_one()
        else:
            extra_wind = 0

        wind = common.XY(x=0,
                         y=self.upward_wind[current_position.x] + extra_wind)
        return wind
Ejemplo n.º 13
0
 def get_start_states(self) -> list[State]:
     start_positions: list[
         common.XY] = self._grid_world.get_start_positions()
     start_velocity = common.XY(x=0, y=0)
     start_states = [
         State(is_terminal=False,
               position=position,
               velocity=start_velocity) for position in start_positions
     ]
     return start_states
Ejemplo n.º 14
0
 def change_request(self, current_position: common.XY, move: Optional[common.XY]) -> common.XY:
     if move is None:
         requested_position: common.XY = current_position
     else:
         requested_position: common.XY = common.XY(
             x=current_position.x + move.x,
             y=current_position.y + move.y
         )
     # project back to grid if outside
     new_position: common.XY = self.project_back_to_grid(requested_position)
     return new_position
Ejemplo n.º 15
0
 def _build_states(self):
     """set S"""
     for x in range(self.grid_world.max_x + 1):
         for y in range(self.grid_world.max_y + 1):
             position = common.XY(x=x, y=y)
             is_terminal: bool = self.grid_world.is_at_goal(position)
             new_state: State = State(
                 position=position,
                 is_terminal=is_terminal,
             )
             self.states.append(new_state)
Ejemplo n.º 16
0
 def project_back_to_grid(self, requested_position: common.XY) -> common.XY:
     x = requested_position.x
     y = requested_position.y
     if x < 0:
         x = 0
     if y < 0:
         y = 0
     if x > self.max_x:
         x = self.max_x
     if y > self.max_y:
         y = self.max_y
     return common.XY(x=x, y=y)
Ejemplo n.º 17
0
 def update_grid_policy(self, policy: TabularPolicy):
     # policy_: policy.Deterministic
     for s, state in enumerate(self.states):
         position: common.XY = common.XY(
             x=state.ending_cars_2,
             y=state.ending_cars_1)  # reversed like in book
         action: Action = policy.get_action(s)  # type: ignore
         transfer_1_to_2: int = action.transfer_1_to_2
         # print(position, transfer_1_to_2)
         self.grid_world.set_policy_value(
             position=position,
             policy_value=transfer_1_to_2,
         )
Ejemplo n.º 18
0
 def _load_gridworld(self):
     self._set_sizes()
     self._grid_surface.fill(self._background_color)
     for x in range(self._max_x + 1):
         for y in range(self._max_y + 1):
             square: common.Square = self._grid_world.get_square(
                 position=common.XY(x, y))
             if self._display_v:
                 v = self._grid_world.v[y, x]
                 self._draw_square(x, y, square, self._grid_surface, v=v)
             else:
                 self._draw_square(x, y, square, self._grid_surface)
     self._copy_grid_into_background()
Ejemplo n.º 19
0
    def _draw_policy(self, surface: pygame.Surface, rect: pygame.Rect,
                     output_square: common.OutputSquare):
        policy_value = output_square.policy_value
        if policy_value is not None:
            policy_color: pygame.Color = self._get_policy_value_color(
                policy_value)
            pygame.draw.rect(surface, policy_color, rect)

            if policy_value == 1:
                text = "Hit"
            else:
                text = "Stick"
            sub_rect = self._get_sub_rect(rect, move=common.XY(x=0, y=0))
            self._center_text(surface, sub_rect, text)
Ejemplo n.º 20
0
def racetrack_test() -> bool:
    environment_parameters = EnvironmentParameters(grid=grids.TRACK_1)
    environment = Environment(environment_parameters)
    environment.build()

    for state_ in environment.states:
        state_index = environment.state_index[state_]
        print(f"{state_} \t index={state_index}")

    print()

    for action_ in environment.actions:
        action_index = environment.action_index[action_]
        print(f"{action_} \t index={action_index}")

    print()

    state_ = state.State(is_terminal=False,
                         position=common.XY(x=4, y=0),
                         velocity=common.XY(x=0, y=1))
    action_ = action.Action(acceleration=common.XY(x=1, y=0))
    response_ = environment.from_state_perform_action(state_, action_)
    print(state_, action_)
    print(response_)

    state_ = state.State(is_terminal=False,
                         position=common.XY(x=5, y=4),
                         velocity=common.XY(x=1, y=0))
    action_ = action.Action(acceleration=common.XY(x=0, y=0))
    response_ = environment.from_state_perform_action(state_, action_)
    print(state_, action_)
    print(response_)

    state_ = state.State(is_terminal=False,
                         position=common.XY(x=0, y=0),
                         velocity=common.XY(x=0, y=3))
    action_ = action.Action(common.XY(x=0, y=-1))
    response_ = environment.from_state_perform_action(state_, action_)
    print(state_, action_)
    print(response_)

    return True
Ejemplo n.º 21
0
    def draw_response(self, state: State,
                      action: Action) -> tuple[float, State]:
        """
        draw a single outcome for a single state and action
        standard call for episodic algorithms
        """
        self._draw_next_state(state, action)

        if self._next_state.position == common.XY(x=self._grid_world.max_x,
                                                  y=0):
            reward = 1.0
        else:
            reward = 0.0

        return reward, self._next_state
Ejemplo n.º 22
0
def grid_test() -> bool:
    environment_parameters = EnvironmentParameters(
        actions_list=common.ActionsList.FOUR_MOVES)
    environment = Environment(environment_parameters)
    grid_world_ = environment.grid_world
    shape = grid_world_.max_y + 1, grid_world_.max_x + 1
    cartesian_grid = np.empty(shape=shape, dtype=int)
    # noinspection PyTypeChecker
    for y, x in np.ndindex(cartesian_grid.shape):
        position: common.XY = common.XY(x, y)
        square: int = grid_world_.get_square(position)
        cartesian_grid[y, x] = square

    print(cartesian_grid)
    return True
Ejemplo n.º 23
0
 def _draw_frame_on_background(self,
                               agent_position: Optional[common.XY] = None,
                               agent_move: Optional[common.XY] = None,
                               prev_position: Optional[common.XY] = None,
                               prev_move: Optional[common.XY] = None
                               ):
     for x in range(self._max_x + 1):
         for y in range(self._max_y + 1):
             position: common.XY = common.XY(x, y)
             if position == agent_position:
                 self._draw_agent_on_background(agent_position, agent_move)
             elif position == prev_position:
                 self._draw_prev_on_background(prev_position, prev_move)
             else:
                 self._draw_square(surface=self._background,
                                   position=position
                                   )
Ejemplo n.º 24
0
def random_walk_test() -> bool:
    environment_parameters = EnvironmentParameters(
        actions_list=common.ActionsList.NO_ACTIONS
    )
    environment = Environment(environment_parameters)
    environment.build()

    for state_ in environment.states:
        state_index = environment.state_index[state_]
        print(f"{state_} \t index={state_index}")

    print()

    for action_ in environment.actions:
        action_index = environment.action_index[action_]
        print(f"{action_} \t index={action_index}")

    print()

    state_ = state.State(is_terminal=False, position=common.XY(x=4, y=0))
    action_ = action.Action(common.XY(x=1, y=0))
    observation_ = environment.from_state_perform_action(state_, action_)
    print(state_, action_)
    print(observation_)

    state_ = state.State(is_terminal=False, position=common.XY(x=5, y=0))
    action_ = action.Action(common.XY(x=1, y=0))
    observation_ = environment.from_state_perform_action(state_, action_)
    print(state_, action_)
    print(observation_)

    state_ = state.State(is_terminal=False, position=common.XY(x=0, y=0))
    action_ = action.Action(common.XY(x=-1, y=0))
    observation_ = environment.from_state_perform_action(state_, action_)
    print(state_, action_)
    print(observation_)

    return True
Ejemplo n.º 25
0
 def _draw_policy(self, surface: pygame.Surface, rect: pygame.Rect, output_square: common.OutputSquare):
     if output_square.policy_value is not None:
         text: str = f"{output_square.policy_value:.1f}"
         sub_rect = self._get_sub_rect(rect, move=common.XY(x=0, y=0))
         self._center_text(surface, sub_rect, text)
Ejemplo n.º 26
0
 def _get_random_movement(self) -> common.XY:
     x_random: int = self._random_move_distribution.draw_one()
     return common.XY(x=x_random, y=0)
Ejemplo n.º 27
0
    def _draw_square(self,
                     x: int,
                     y: int,
                     square: common.Square,
                     surface: pygame.Surface,
                     v: Optional[float] = None) -> pygame.Rect:
        row = self._max_y - y
        col = x

        color: pygame.Color = self._color_lookup[square]
        left: int = col * self._cell_pixels
        top: int = row * self._cell_pixels
        width: int = self._cell_pixels - 1
        height: int = self._cell_pixels - 1

        # doesn'_t like named parameters
        square_rect: pygame.Rect = pygame.Rect(left, top, width, height)
        pygame.draw.rect(surface, color, square_rect)

        text: str = "12.3"

        move: common.XY = common.XY(x=-1, y=1)
        sub_rect = self._get_sub_rect(square_rect, move)
        self._center_text(surface, sub_rect, text)

        # sub_width: float = square_rect.width / 3.0
        # sub_height: float = square_rect.height / 3.0
        # sub_left: float = square_rect.left + (move.x + 1)*sub_width
        # sub_top: float = square_rect.top + (1 - move.y)*sub_height
        # sub_rect: pygame.Rect = pygame.Rect(sub_left, sub_top, sub_width, sub_height)
        # print(f"rect = {square_rect}")
        # print(f"sub  = {sub_rect}")

        # print("next...")

        # bounds = self._font.get_rect(text)
        # bounds.center = (0, 0)
        # bounds.move_ip(square_rect.center)
        # self._font.render_to(surface, bounds.topleft, text)

        # print(bounds.x, bounds.y, bounds.w, bounds.h)
        # print(bounds.center)
        # bounds.center = (0, 0)
        # print(bounds.x, bounds.y, bounds.w, bounds.h)
        # print(bounds.center)

        # bounds.center += square_rect.center
        # bounds.move_ip(square_rect.center)
        # bounds.x += square_rect.centerx
        # bounds.y += square_rect.centery
        # bounds.move(square_rect.center)
        # print(bounds.x, bounds.y, bounds.w, bounds.h)
        # print(bounds.center)
        # print(square_rect.center)

        # destination: tuple = (left, top)

        if v is not None:
            # write v in square_rect
            pass
        return square_rect
Ejemplo n.º 28
0
 def get_start_positions(self) -> list[common.XY]:
     starts: np.ndarray = (self._grid[:, :] == common.Square.START)
     starts_flat: np.ndarray = np.flatnonzero(starts)
     iy, ix = np.unravel_index(starts_flat, shape=self._grid.shape)
     positions = [self._position_flip(common.XY(ix[i], iy[i])) for i in range(ix.shape[0])]
     return positions
Ejemplo n.º 29
0
 def _move_flip(self, xy_in: common.XY) -> common.XY:
     return common.XY(x=xy_in.x, y=-xy_in.y)
Ejemplo n.º 30
0
 def _position_flip(self, xy_in: common.XY) -> common.XY:
     return common.XY(x=xy_in.x, y=self.max_y - xy_in.y)