예제 #1
0
 def __init__(self):
     super().__init__()
     self._max_cars: int = 20      # problem statement = 20
     self._environment_parameters = EnvironmentParameters(
         max_cars=self._max_cars,
         extra_rules=True,  # change this for extra rules in book as per challenge
     )
     self._comparison_settings = common.Settings(
         gamma=0.9,
         policy_parameters=common.PolicyParameters(
             policy_type=common.PolicyType.TABULAR_DETERMINISTIC,
         ),
         algorithm_parameters=common.AlgorithmParameters(
             theta=0.1  # accuracy of policy_evaluation
         ),
         display_every_step=True,
     )
     self._graph3d_values = common.Graph3DValues(
         x_label="Cars at 1st location",
         y_label="Cars at 2nd location",
         z_label="V(s)",
         x_min=0,
         x_max=self._max_cars,
         y_min=0,
         y_max=self._max_cars,
     )
     self._grid_view_parameters = common.GridViewParameters(
         grid_view_type=common.GridViewType.JACKS,
         show_result=True,
         show_policy=True,
     )
예제 #2
0
class Settings(common.Settings):
    gamma: float = 1.0
    runs: int = 1
    training_episodes: int = 500_000
    episode_print_frequency: int = 10_000
    policy_parameters: common.PolicyParameters = common.PolicyParameters(
        policy_type=common.PolicyType.TABULAR_DETERMINISTIC, )
예제 #3
0
class Settings(common.Settings):
    runs: int = 100
    # runs_multiprocessing: common.ParallelContextType =common.ParallelContextType.FORK_GLOBAL
    training_episodes: int = 100
    policy_parameters: common.PolicyParameters = common.PolicyParameters(
        policy_type=common.PolicyType.TABULAR_NONE)
    algorithm_parameters: common.AlgorithmParameters = AlgorithmParameters()
예제 #4
0
 def __init__(self, environment: TabularEnvironment,
              policy_parameters: common.PolicyParameters):
     super().__init__(environment, policy_parameters)
     self.epsilon: float = self._policy_parameters.epsilon
     greedy_policy_parameters = common.PolicyParameters(
         policy_type=common.PolicyType.TABULAR_DETERMINISTIC,
         store_matrix=False,
     )
     self.greedy_policy: Deterministic = Deterministic(
         self._environment, greedy_policy_parameters)
class Settings(common.Settings):
    gamma: float = 1.0  # 0.99999
    policy_parameters: common.PolicyParameters = common.PolicyParameters(
        policy_type=common.PolicyType.TABULAR_DETERMINISTIC, )
    display_every_step: bool = False