Example #1
0
    def __init__(self,
                 observation_space,
                 action_space,
                 name="MBIE Agent",
                 params={},
                 starting_policy=None):
        BaseAgent.__init__(self,
                           observation_space,
                           action_space,
                           name,
                           params=dict(MBIE_DEFAULTS, **params))

        # Policy Setup
        if starting_policy:
            self.predict_policy = starting_policy
        else:
            self.predict_policy = DiscreteTabularPolicy(self.observation_space,
                                                        self.action_space,
                                                        default_value=1 /
                                                        (1 - self.gamma))
        self.backup_lim = int(
            np.log(1 / (self.params['epsilon_one'] * (1 - self.gamma))) /
            (1 - self.gamma))
        self.policy_iterations = 0

        # Model Setup
        self.model = DiscreteTabularModel(
            observation_space,
            action_space,
            default_reward=self.params['max_reward'],
            limit=self.params['known_threshold'])

        self.learn_policy = self.predict_policy
Example #2
0
    def __init__(self,
                 observation_space,
                 action_space,
                 name="RMax Agent",
                 params=None,
                 starting_policy=None):
        BaseAgent.__init__(self, observation_space, action_space, name)

        # Hyper-parameters
        self.params = dict(RMAX_DEFAULTS)
        if params:
            for key, value in params:
                self.params[key] = value
        self.max_reward = self.params['max_reward']
        self.epsilon_one = self.params['epsilon_one']
        self.known_threshold = self.params['known_threshold']
        self.gamma = self.params['gamma']
        #self.max_reward = 1 / (1 - self.gamma)

        # Policy Setup
        self.starting_policy = starting_policy
        self.backup_lim = int(
            np.log(1 / (self.epsilon_one * (1 - self.gamma))) /
            (1 - self.gamma))
        self.stepwise_backup_steps = 1  # self.backup_lim
        self.episodic_backup_steps = min(self.backup_lim, 5)

        # Model Setup
        self.model = KnownTabularModel(action_space.n, self.max_reward,
                                       self.known_threshold)

        self.reset()
Example #3
0
    def __init__(self, observation_space, action_space, name="Q-Learning Agent", parameters={}, starting_policy=None):
        BaseAgent.__init__(self, observation_space, action_space, name, params=dict(QLEARNING_CONSTS, **parameters))

        # Policy Setup
        if starting_policy:
            self.predict_policy = starting_policy
        else:
            self.predict_policy = DiscreteTabularPolicy(self.observation_space, self.action_space, default_value=1/(1-self.gamma))
        self.learn_policy = EpsilonGreedy(
                action_space=self.action_space,
                policy=self.predict_policy,
                epsilon=self.epsilon
            )
Example #4
0
    def __init__(self,
                 observation_space,
                 action_space,
                 name="UCBVI Agent",
                 params=None,
                 starting_policy=None):
        BaseAgent.__init__(self, observation_space, action_space, name)

        # Hyper-parameters
        self.params = dict(UCBVI_DEFAULTS)
        if params:
            for key, value in params:
                self.params[key] = value
        self.max_reward = self.params['max_reward']
        self.epsilon_one = self.params['epsilon_one']
        self.known_threshold = self.params['known_threshold']
        self.gamma = self.params['gamma']
        #self.max_reward = 1 / (1 - self.gamma)
        self.delta = self.params['delta']

        # Policy Setup
        self.starting_policy = starting_policy
        self.backup_lim = int(
            np.log(1 / (self.epsilon_one * (1 - self.gamma))) /
            (1 - self.gamma))
        self.stepwise_backup_steps = 0
        self.episodic_backup_steps = self.backup_lim
        self.policy_iterations = 0

        # Model Setup
        self.model = DiscreteTabularModel(observation_space,
                                          action_space,
                                          default_reward=self.max_reward,
                                          limit=self.known_threshold)

        # Experience Tracking
        self.last_episode = []
        # self.last_episode_model = KnownTabularModel(action_space.n, self.max_reward, 1)

        self.reset()