예제 #1
0
 def create_option(self, subgoal):
     """Create a new option for the given subgoal with a pseudo reward function and value iteration policy."""
     id = len(self.options)
     vi = ValueIteration(id, [self.fa.evaluate(subgoal.state)],
                         self,
                         self.plan_iterations,
                         alpha=self.alpha,
                         gamma=self.gamma)
     policy = VIPolicy(self.num_actions, vi)
     self.options[id] = Option(id, self.fa, policy, self.eta, self.gamma,
                               subgoal, self.num_actions)
     self.intrinsic.append(np.zeros((self.fa.num_features, 1)))
예제 #2
0
 def __init__(self,
              policy,
              fa,
              num_actions,
              alpha,
              gamma,
              eta,
              zeta,
              epsilon,
              plan_iter,
              sim_samples,
              sim_steps,
              retain_theta=True,
              subgoals=[],
              samples=[]):
     self.policy = policy
     self.fa = fa
     self.num_actions = num_actions
     self.alpha = alpha
     self.gamma = gamma
     self.eta = eta
     self.zeta = zeta
     self.epsilon = epsilon
     self.plan_iterations = plan_iter
     self.sim_samples = sim_samples
     self.sim_steps = sim_steps
     self.subgoals = subgoals
     self.samples = samples
     self.reached_subgoals = []
     self.extrinsic = None
     self.intrinsic = [np.ones((self.fa.num_features, 1))] * num_actions
     self.options = {
         i: Option(i, fa, FixedPolicy(num_actions, i), eta, gamma, None,
                   num_actions)
         for i in range(num_actions)
     }
     self.vi = ValueIteration(-1,
                              self.intrinsic,
                              self,
                              plan_iter,
                              retain_theta=retain_theta,
                              use_options=False,
                              alpha=alpha,
                              gamma=gamma)
     self.vi_policy = VIPolicy(num_actions, self.vi)
     self.option_stack = []
     self.step = 0
     self.viz = None