def get_state_function_graph(self, usable_ace: bool) -> common.Graph3DValues: env: Environment = self.environment x_values = np.array(env.player_sums, dtype=int) y_values = np.array(env.dealers_cards, dtype=int) z_values = np.empty(shape=y_values.shape + x_values.shape, dtype=float) for player_sum in env.player_sums: for dealers_card in env.dealers_cards: state: State = State( is_terminal=False, player_sum=player_sum, usable_ace=usable_ace, dealers_card=dealers_card, ) x = player_sum - env.player_sum_min y = dealers_card - env.dealers_card_min s = env.state_index[state] z_values[y, x] = self.algorithm.V[s] # print(player_sum, dealer_card, v[state]) g: common.Graph3DValues = copy.copy(self._comparison.graph3d_values) if usable_ace: g.title = "Usable Ace" else: g.title = "No usable Ace" g.x_series = common.Series(title=g.x_label, values=x_values) g.y_series = common.Series(title=g.y_label, values=y_values) g.z_series = common.Series(title=g.z_label, values=z_values) return g
def get_policy_graph_values(self) -> common.Graph2DValues: policy: TabularPolicy = self.algorithm.target_policy x_list: list[int] = [] y_list: list[float] = [] for s, state in enumerate(self.environment.states): if not state.is_terminal: x_list.append(state.capital) action: Action = policy.get_action(s) # type: ignore y_list.append(float(action.stake)) # print(state.capital, v[state]) x_values = np.array(x_list, dtype=int) y_values = np.array(y_list, dtype=float) g: common.Graph2DValues = common.Graph2DValues() g.x_series = common.Series(title=g.x_label, values=x_values) g.graph_series = [common.Series(title=g.y_label, values=y_values)] g.title = "Policy" g.x_label = "Capital" g.y_label = "Stake" g.x_min = 0.0 g.x_max = 100.0 g.y_min = 0.0 g.y_max = None g.has_grid = True g.has_legend = False return g
def compile(self): comparison_settings = self.comparison.comparison_settings start = comparison_settings.episode_to_start_recording frequency = comparison_settings.episode_recording_frequency episode_array = np.array([ episode_counter for episode_counter in range( 1, comparison_settings.training_episodes + 1) if self._is_record_episode(episode_counter, start, frequency) ], dtype=float) self.x_series = common.Series(title="Episode", values=episode_array) # collate output from self.recorder for settings in self.comparison.settings_list: values = np.array([ self._recorder[settings.algorithm_parameters, episode_counter] for episode_counter in episode_array ], dtype=float) algorithm_type: common.AlgorithmType = settings.algorithm_parameters.algorithm_type title: str = self._trainer.algorithm_factory.get_algorithm_title( settings.algorithm_parameters) series = common.Series( title=title, identifiers={"algorithm_type": algorithm_type}, values=values) self.series_list.append(series)
def compile(self): # return max_timestep or get from elsewhere if self._trainer.max_cum_timestep > 0: # serial case self._max_timestep = self._trainer.max_cum_timestep else: # parallel case max_timestep not returned so deduce it from recoder self._max_timestep = max(t[1] for t in self._recorder.tallies.keys()) timestep_array = np.arange(self._max_timestep+1, dtype=int) self.x_series = common.Series( title="Timestep", values=timestep_array ) # collate output from self.recorder for settings in self.comparison.settings_list: algorithm_type: common.AlgorithmType = settings.algorithm_parameters.algorithm_type values = np.array( [self._recorder[algorithm_type, timestep] for timestep in timestep_array], dtype=float ) title: str = self._trainer.algorithm_factory.get_algorithm_title(settings.algorithm_parameters) series_ = common.Series( title=title, identifiers={"algorithm_type": algorithm_type}, values=values ) self.series_list.append(series_)
def compile(self): self.x_series = common.Series( title="α", values=np.array(self.breakdown_parameters.alpha_list) ) # collate output from self.recorder for algorithm_type in self.breakdown_parameters.algorithm_type_list: values = np.array( [self._recorder[algorithm_type, alpha] for alpha in self.breakdown_parameters.alpha_list], dtype=float ) name = self._trainer.algorithm_factory.get_algorithm_name(algorithm_type) series = common.Series( title=name, values=values, identifiers={"algorithm_type": algorithm_type} ) self.series_list.append(series)
def get_state_graph3d_values(self) -> common.Graph3DValues: max_cars: int = self.environment.max_cars x_values = np.arange(max_cars + 1, dtype=float) y_values = np.arange(max_cars + 1, dtype=float) z_values = np.empty(shape=(max_cars + 1, max_cars + 1), dtype=float) for cars1 in range(max_cars + 1): for cars2 in range(max_cars + 1): state: State = State( ending_cars_1=cars1, ending_cars_2=cars2, is_terminal=False, ) s: int = self.environment.state_index[state] z_values[cars2, cars1] = self.algorithm.V[s] # print(cars1, cars2, v[state]) g: common.Graph3DValues = self._comparison.graph3d_values g.x_series = common.Series(title=g.x_label, values=x_values) g.y_series = common.Series(title=g.y_label, values=y_values) g.z_series = common.Series(title=g.z_label, values=z_values) return g
def get_state_graph_values(self) -> common.Graph2DValues: x_list: list[int] = [] y_list: list[float] = [] for s, state in enumerate(self.environment.states): if not state.is_terminal: x_list.append(state.capital) y_list.append(self.algorithm.V[s]) # print(state.capital, v[state]) x_values = np.array(x_list, dtype=int) y_values = np.array(y_list, dtype=float) g: common.Graph2DValues = common.Graph2DValues() g.x_series = common.Series(title=g.x_label, values=x_values) g.graph_series = [common.Series(title=g.y_label, values=y_values)] g.title = "V(s)" g.x_label = "Capital" g.y_label = "V(s)" g.x_min = 0.0 g.x_max = 100.0 g.y_min = 0.0 g.y_max = 1.0 g.has_grid = True g.has_legend = False return g