Example #1
0
    def get_state_function_graph(self,
                                 usable_ace: bool) -> common.Graph3DValues:
        env: Environment = self.environment

        x_values = np.array(env.player_sums, dtype=int)
        y_values = np.array(env.dealers_cards, dtype=int)
        z_values = np.empty(shape=y_values.shape + x_values.shape, dtype=float)

        for player_sum in env.player_sums:
            for dealers_card in env.dealers_cards:
                state: State = State(
                    is_terminal=False,
                    player_sum=player_sum,
                    usable_ace=usable_ace,
                    dealers_card=dealers_card,
                )
                x = player_sum - env.player_sum_min
                y = dealers_card - env.dealers_card_min
                s = env.state_index[state]
                z_values[y, x] = self.algorithm.V[s]
                # print(player_sum, dealer_card, v[state])

        g: common.Graph3DValues = copy.copy(self._comparison.graph3d_values)
        if usable_ace:
            g.title = "Usable Ace"
        else:
            g.title = "No usable Ace"
        g.x_series = common.Series(title=g.x_label, values=x_values)
        g.y_series = common.Series(title=g.y_label, values=y_values)
        g.z_series = common.Series(title=g.z_label, values=z_values)
        return g
Example #2
0
    def get_policy_graph_values(self) -> common.Graph2DValues:
        policy: TabularPolicy = self.algorithm.target_policy

        x_list: list[int] = []
        y_list: list[float] = []
        for s, state in enumerate(self.environment.states):
            if not state.is_terminal:
                x_list.append(state.capital)
                action: Action = policy.get_action(s)  # type: ignore
                y_list.append(float(action.stake))
                # print(state.capital, v[state])
        x_values = np.array(x_list, dtype=int)
        y_values = np.array(y_list, dtype=float)

        g: common.Graph2DValues = common.Graph2DValues()
        g.x_series = common.Series(title=g.x_label, values=x_values)
        g.graph_series = [common.Series(title=g.y_label, values=y_values)]
        g.title = "Policy"
        g.x_label = "Capital"
        g.y_label = "Stake"
        g.x_min = 0.0
        g.x_max = 100.0
        g.y_min = 0.0
        g.y_max = None
        g.has_grid = True
        g.has_legend = False
        return g
Example #3
0
    def compile(self):
        comparison_settings = self.comparison.comparison_settings
        start = comparison_settings.episode_to_start_recording
        frequency = comparison_settings.episode_recording_frequency
        episode_array = np.array([
            episode_counter for episode_counter in range(
                1, comparison_settings.training_episodes + 1)
            if self._is_record_episode(episode_counter, start, frequency)
        ],
                                 dtype=float)

        self.x_series = common.Series(title="Episode", values=episode_array)

        # collate output from self.recorder
        for settings in self.comparison.settings_list:
            values = np.array([
                self._recorder[settings.algorithm_parameters, episode_counter]
                for episode_counter in episode_array
            ],
                              dtype=float)
            algorithm_type: common.AlgorithmType = settings.algorithm_parameters.algorithm_type
            title: str = self._trainer.algorithm_factory.get_algorithm_title(
                settings.algorithm_parameters)
            series = common.Series(
                title=title,
                identifiers={"algorithm_type": algorithm_type},
                values=values)
            self.series_list.append(series)
    def compile(self):
        # return max_timestep or get from elsewhere
        if self._trainer.max_cum_timestep > 0:
            # serial case
            self._max_timestep = self._trainer.max_cum_timestep
        else:
            # parallel case max_timestep not returned so deduce it from recoder
            self._max_timestep = max(t[1] for t in self._recorder.tallies.keys())
        timestep_array = np.arange(self._max_timestep+1, dtype=int)

        self.x_series = common.Series(
            title="Timestep",
            values=timestep_array
        )

        # collate output from self.recorder
        for settings in self.comparison.settings_list:
            algorithm_type: common.AlgorithmType = settings.algorithm_parameters.algorithm_type
            values = np.array(
                [self._recorder[algorithm_type, timestep]
                 for timestep in timestep_array],
                dtype=float
            )
            title: str = self._trainer.algorithm_factory.get_algorithm_title(settings.algorithm_parameters)
            series_ = common.Series(
                title=title,
                identifiers={"algorithm_type": algorithm_type},
                values=values
            )
            self.series_list.append(series_)
Example #5
0
 def compile(self):
     self.x_series = common.Series(
         title="α",
         values=np.array(self.breakdown_parameters.alpha_list)
     )
     # collate output from self.recorder
     for algorithm_type in self.breakdown_parameters.algorithm_type_list:
         values = np.array(
             [self._recorder[algorithm_type, alpha] for alpha in self.breakdown_parameters.alpha_list],
             dtype=float
         )
         name = self._trainer.algorithm_factory.get_algorithm_name(algorithm_type)
         series = common.Series(
             title=name,
             values=values,
             identifiers={"algorithm_type": algorithm_type}
         )
         self.series_list.append(series)
Example #6
0
    def get_state_graph3d_values(self) -> common.Graph3DValues:
        max_cars: int = self.environment.max_cars
        x_values = np.arange(max_cars + 1, dtype=float)
        y_values = np.arange(max_cars + 1, dtype=float)
        z_values = np.empty(shape=(max_cars + 1, max_cars + 1), dtype=float)

        for cars1 in range(max_cars + 1):
            for cars2 in range(max_cars + 1):
                state: State = State(
                    ending_cars_1=cars1,
                    ending_cars_2=cars2,
                    is_terminal=False,
                )
                s: int = self.environment.state_index[state]
                z_values[cars2, cars1] = self.algorithm.V[s]
                # print(cars1, cars2, v[state])

        g: common.Graph3DValues = self._comparison.graph3d_values
        g.x_series = common.Series(title=g.x_label, values=x_values)
        g.y_series = common.Series(title=g.y_label, values=y_values)
        g.z_series = common.Series(title=g.z_label, values=z_values)
        return g
Example #7
0
    def get_state_graph_values(self) -> common.Graph2DValues:
        x_list: list[int] = []
        y_list: list[float] = []
        for s, state in enumerate(self.environment.states):
            if not state.is_terminal:
                x_list.append(state.capital)
                y_list.append(self.algorithm.V[s])
                # print(state.capital, v[state])
        x_values = np.array(x_list, dtype=int)
        y_values = np.array(y_list, dtype=float)

        g: common.Graph2DValues = common.Graph2DValues()
        g.x_series = common.Series(title=g.x_label, values=x_values)
        g.graph_series = [common.Series(title=g.y_label, values=y_values)]
        g.title = "V(s)"
        g.x_label = "Capital"
        g.y_label = "V(s)"
        g.x_min = 0.0
        g.x_max = 100.0
        g.y_min = 0.0
        g.y_max = 1.0
        g.has_grid = True
        g.has_legend = False
        return g