Example #1
0
    def step(self, a):
        s = self.state
        torque = self.AVAIL_TORQUE[a]

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.random_state.uniform(-self.torque_noise_max,
                                                self.torque_noise_max)

        # Now, augment the state with our force action so it can be passed to
        # _dsdt
        s_augmented = np.append(s, torque)

        ns = rk4(self._dsdt, s_augmented, [0, self.dt])
        # only care about final timestep of integration returned by integrator
        ns = ns[-1]
        ns = ns[:4]  # omit action
        # ODEINT IS TOO SLOW!
        # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
        # self.s_continuous = ns_continuous[-1] # We only care about the state
        # at the ''final timestep'', self.dt

        ns[0] = wrap(ns[0], -np.pi, np.pi)
        ns[1] = wrap(ns[1], -np.pi, np.pi)
        ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
        ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
        self.state = ns.copy()
        terminal = self.isTerminal()
        reward = self._reward_function(terminal)
        return reward, ns, terminal, self.possibleActions()
Example #2
0
File: Acrobot.py Project: MLDL/rlpy
    def step(self, a):
        s = self.state
        torque = self.AVAIL_TORQUE[a]

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.random_state.uniform(-
                                                self.torque_noise_max, self.torque_noise_max)

        # Now, augment the state with our force action so it can be passed to
        # _dsdt
        s_augmented = np.append(s, torque)

        ns = rk4(self._dsdt, s_augmented, [0, self.dt])
        # only care about final timestep of integration returned by integrator
        ns = ns[-1]
        ns = ns[:4]  # omit action
        # ODEINT IS TOO SLOW!
        # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
        # self.s_continuous = ns_continuous[-1] # We only care about the state
        # at the ''final timestep'', self.dt

        ns[0] = wrap(ns[0], -np.pi, np.pi)
        ns[1] = wrap(ns[1], -np.pi, np.pi)
        ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
        ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
        self.state = ns.copy()
        terminal = self.isTerminal()
        reward = -1. if not terminal else 0.
        return reward, ns, terminal, self.possibleActions()
    def step(self, a):
        s = self.state[:4]
        torque = self.AVAIL_TORQUE[a]

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.random_state.uniform(-self.torque_noise_max, 
                                                self.torque_noise_max)

        # Now, augment the state with our force action so it can be passed to
        # _dsdt
        s_augmented = np.append(s, torque)

        ns = rk4(self._dsdt, s_augmented, [0, self.dt])
        # only care about final timestep of integration returned by integrator
        ns = ns[-1]
        ns = ns[:4]  # omit action
        # ODEINT IS TOO SLOW!
        # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
        # self.s_continuous = ns_continuous[-1] # We only care about the state
        # at the ''final timestep'', self.dt

        ns[0] = wrap(ns[0], -np.pi, np.pi)
        ns[1] = wrap(ns[1], -np.pi, np.pi)
        ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
        ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)

        self.prev_states.append(s)  
        ns = self.augment_state(ns)
        self.state = ns.copy()
        
        terminal = self.isTerminal()

        if not terminal:

            ########### GOAL FUNCTION: Bound ############

            while not self.isTerminal() and self.goalfn(ns[:4], self.goalArray[0]): 
                self.goalArray = self.goalArray[1:]
                print "Goal reached", len(self.prev_states), self.goalArray, self.encodingFunction(self.prev_states)
                import ipdb; ipdb.set_trace()

            ############################################

        # default reward setup
        reward = self.STEP_REWARD if not terminal else self.GOAL_REWARD


        if not terminal and self.rewardFunction != None:
            reward = self.rewardFunction(self.prev_states, 
                                    self.goalArray, 
                                    self.STEP_REWARD, 
                                    self.GOAL_REWARD)
        
        return reward, ns, terminal, self.possibleActions()
Example #4
0
    def step(self, a):
        d = self.d
        a = self.actions[a]
        s = np.hstack((self.pos_cm, self.theta, self.v_cm, self.dtheta))
        ns = rk4(dsdt, s, [0, self.dt], a, self.P, self.inertia, self.G,
                 self.U, self.lengths, self.masses, self.k1, self.k2)[-1]

        self.theta = ns[2:2 + d]
        self.v_cm = ns[2 + d:4 + d]
        self.dtheta = ns[4 + d:]
        self.pos_cm = ns[:2]
        return (self._reward(a), self.state, self.isTerminal(),
                self.possibleActions())
Example #5
0
    def step(self, a):
        d = self.d
        a = self.actions[a]
        s = np.hstack((self.pos_cm, self.theta, self.v_cm, self.dtheta))
        ns = rk4(
            dsdt, s, [0,
                      self.dt], a, self.P, self.inertia, self.G, self.U, self.lengths,
            self.masses, self.k1, self.k2)[-1]

        self.theta = ns[2:2 + d]
        self.v_cm = ns[2 + d:4 + d]
        self.dtheta = ns[4 + d:]
        self.pos_cm = ns[:2]
        return (
            self._reward(
                a), self.state, self.isTerminal(), self.possibleActions()
        )