Esempio n. 1
0
 def expectedStep(self, s, a):
     # Returns k possible outcomes
     #  p: k-by-1    probability of each transition
     #  r: k-by-1    rewards
     # ns: k-by-|s|  next state
     #  t: k-by-1    terminal values
     # pa: k-by-??   possible actions for each next state
     actions = self.possibleActions(s)
     k = len(actions)
     # Make Probabilities
     intended_action_index = findElemArray1D(a, actions)
     p = np.ones((k, 1)) * self.NOISE / (k * 1.)
     p[intended_action_index, 0] += 1 - self.NOISE
     # Make next states
     ns = np.tile(s, (k, 1)).astype(int)
     actions = self.ACTIONS[actions]
     ns += actions
     # Make next possible actions
     pa = np.array([self.possibleActions(sn) for sn in ns])
     # Make rewards
     r = np.ones((k, 1)) * self.STEP_REWARD
     goal = self.map[ns[:, 0], ns[:, 1]] == self.GOAL
     pit = self.map[ns[:, 0], ns[:, 1]] == self.PIT
     r[goal] = self.GOAL_REWARD
     r[pit] = self.PIT_REWARD
     # Make terminals
     t = np.zeros((k, 1), bool)
     t[goal] = True
     t[pit] = True
     return p, r, ns, t, pa
Esempio n. 2
0
 def top(self, A, s):
     # returns the block on top of block A. Return [] if nothing is on top
     # of A
     on_A = findElemArray1D(A, s)
     on_A = np.setdiff1d(on_A,
                         [A])  # S[i] = i is the key for i is on table.
     return on_A
Esempio n. 3
0
 def expectedStep(self, s, a):
     # Returns k possible outcomes
     #  p: k-by-1    probability of each transition
     #  r: k-by-1    rewards
     # ns: k-by-|s|  next state
     #  t: k-by-1    terminal values
     # pa: k-by-??   possible actions for each next state
     actions = self.possibleActions(s)
     k = len(actions)
     # Make Probabilities
     intended_action_index = findElemArray1D(a, actions)
     p = np.ones((k, 1)) * self.NOISE / (k * 1.)
     p[intended_action_index, 0] += 1 - self.NOISE
     # Make next states
     ns = np.tile(s, (k, 1)).astype(int)
     actions = self.ACTIONS[actions]
     ns += actions
     # Make next possible actions
     pa = np.array([self.possibleActions(sn) for sn in ns])
     # Make rewards
     r = np.ones((k, 1)) * self.STEP_REWARD
     goal = self.map[ns[:, 0], ns[:, 1]] == self.GOAL
     pit = self.map[ns[:, 0], ns[:, 1]] == self.PIT
     r[goal] = self.GOAL_REWARD
     r[pit] = self.PIT_REWARD
     # Make terminals
     t = np.zeros((k, 1), bool)
     t[goal] = True
     t[pit] = True
     return p, r, ns, t, pa
Esempio n. 4
0
    def bestActions(self, s, terminal, p_actions, phi_s=None):
        """
        Returns a list of the best actions at a given state.
        If *phi_s* [the feature vector at state *s*] is given, it is used to
        speed up code by preventing re-computation within this function.

        See :py:meth:`~rlpy.Representations.Representation.Representation.bestAction`

        :param s: The given state
        :param terminal: Whether or not the state *s* is a terminal one.
        :param phi_s: (optional) the feature vector at state (s).
        :return: A list of the best actions at the given state.

        """
        Qs = self.Qs(s, terminal, phi_s)
        Qs = Qs[p_actions]
        # Find the index of best actions
        ind = findElemArray1D(Qs, Qs.max())
        return np.array(p_actions)[ind]
Esempio n. 5
0
    def bestActions(self, s, terminal, p_actions, phi_s=None):
        """
        Returns a list of the best actions at a given state.
        If *phi_s* [the feature vector at state *s*] is given, it is used to
        speed up code by preventing re-computation within this function.

        See :py:meth:`~rlpy.Representations.Representation.Representation.bestAction`

        :param s: The given state
        :param terminal: Whether or not the state *s* is a terminal one.
        :param phi_s: (optional) the feature vector at state (s).
        :return: A list of the best actions at the given state.

        """
        Qs = self.Qs(s, terminal, phi_s)
        Qs = Qs[p_actions]
        # Find the index of best actions
        ind = findElemArray1D(Qs, Qs.max())
        return np.array(p_actions)[ind]
Esempio n. 6
0
 def top(self, A, s):
     # returns the block on top of block A. Return [] if nothing is on top
     # of A
     on_A = findElemArray1D(A, s)
     on_A = np.setdiff1d(on_A, [A])  # S[i] = i is the key for i is on table.
     return on_A