Esempio n. 1
0
File: Acrobot.py Progetto: MLDL/rlpy
    def step(self, a):
        s = self.state
        torque = self.AVAIL_TORQUE[a]

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.random_state.uniform(-
                                                self.torque_noise_max, self.torque_noise_max)

        # Now, augment the state with our force action so it can be passed to
        # _dsdt
        s_augmented = np.append(s, torque)

        ns = rk4(self._dsdt, s_augmented, [0, self.dt])
        # only care about final timestep of integration returned by integrator
        ns = ns[-1]
        ns = ns[:4]  # omit action
        # ODEINT IS TOO SLOW!
        # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
        # self.s_continuous = ns_continuous[-1] # We only care about the state
        # at the ''final timestep'', self.dt

        ns[0] = wrap(ns[0], -np.pi, np.pi)
        ns[1] = wrap(ns[1], -np.pi, np.pi)
        ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
        ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
        self.state = ns.copy()
        terminal = self.isTerminal()
        reward = -1. if not terminal else 0.
        return reward, ns, terminal, self.possibleActions()
Esempio n. 2
0
    def step(self, a):
        s = self.state
        torque = self.AVAIL_TORQUE[a]

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.random_state.uniform(-self.torque_noise_max,
                                                self.torque_noise_max)

        # Now, augment the state with our force action so it can be passed to
        # _dsdt
        s_augmented = np.append(s, torque)

        ns = rk4(self._dsdt, s_augmented, [0, self.dt])
        # only care about final timestep of integration returned by integrator
        ns = ns[-1]
        ns = ns[:4]  # omit action
        # ODEINT IS TOO SLOW!
        # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
        # self.s_continuous = ns_continuous[-1] # We only care about the state
        # at the ''final timestep'', self.dt

        ns[0] = wrap(ns[0], -np.pi, np.pi)
        ns[1] = wrap(ns[1], -np.pi, np.pi)
        ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
        ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
        self.state = ns.copy()
        terminal = self.isTerminal()
        reward = self._reward_function(terminal)
        return reward, ns, terminal, self.possibleActions()
Esempio n. 3
0
    def step(self, a):

        torque = self.AVAIL_TORQUE[a]
        s = self.state

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.random_state.uniform(-self.torque_noise_max,
                                                self.torque_noise_max)

        s_augmented = np.append(s, torque)
        for i in range(4):
            s_dot = np.array(self._dsdt(s_augmented, 0))
            s_augmented += s_dot * self.dt / 4.

            # make sure that we don't have 2 free pendulums but a "gymnast"
            # for k in range(2):
            #    if np.abs(s_augmented[k]) > np.pi:
            #        s_augmented[k] = np.sign(s_augmented[k]) * np.pi
            #        s_augmented[k + 2] = 0.
            s_augmented[0] = wrap(s_augmented[0], -np.pi, np.pi)
            s_augmented[1] = wrap(s_augmented[1], -np.pi, np.pi)
            s_augmented[2] = bound(s_augmented[2], -self.MAX_VEL_1,
                                   self.MAX_VEL_1)
            s_augmented[3] = bound(s_augmented[3], -self.MAX_VEL_2,
                                   self.MAX_VEL_2)

        ns = s_augmented[:4]  # omit action
        self.state = ns.copy()
        terminal = self.isTerminal()
        reward = -1. if not terminal else 0.
        return reward, ns, terminal, self.possibleActions()
    def step(self, a):
        s = self.state[:4]
        torque = self.AVAIL_TORQUE[a]

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.random_state.uniform(-self.torque_noise_max, 
                                                self.torque_noise_max)

        # Now, augment the state with our force action so it can be passed to
        # _dsdt
        s_augmented = np.append(s, torque)

        ns = rk4(self._dsdt, s_augmented, [0, self.dt])
        # only care about final timestep of integration returned by integrator
        ns = ns[-1]
        ns = ns[:4]  # omit action
        # ODEINT IS TOO SLOW!
        # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
        # self.s_continuous = ns_continuous[-1] # We only care about the state
        # at the ''final timestep'', self.dt

        ns[0] = wrap(ns[0], -np.pi, np.pi)
        ns[1] = wrap(ns[1], -np.pi, np.pi)
        ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
        ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)

        self.prev_states.append(s)  
        ns = self.augment_state(ns)
        self.state = ns.copy()
        
        terminal = self.isTerminal()

        if not terminal:

            ########### GOAL FUNCTION: Bound ############

            while not self.isTerminal() and self.goalfn(ns[:4], self.goalArray[0]): 
                self.goalArray = self.goalArray[1:]
                print "Goal reached", len(self.prev_states), self.goalArray, self.encodingFunction(self.prev_states)
                import ipdb; ipdb.set_trace()

            ############################################

        # default reward setup
        reward = self.STEP_REWARD if not terminal else self.GOAL_REWARD


        if not terminal and self.rewardFunction != None:
            reward = self.rewardFunction(self.prev_states, 
                                    self.goalArray, 
                                    self.STEP_REWARD, 
                                    self.GOAL_REWARD)
        
        return reward, ns, terminal, self.possibleActions()
Esempio n. 5
0
	def step(self, a):

		self._stepcount += 1
		# a = 0
		# if self._runcount % 20 == 0:
		# 	self.cloth_experiment.render = True
		# 	logging.info("%d steps..." % self._stepcount)
		# else:
		# 	self.cloth_experiment.render = False
		x, y, angle = self.state[:3]
		nx, ny, nangle = x, y, angle
		if a == self.TURN_LEFT:
			nangle = angle + self.TURN_ANGLE
		elif a == self.TURN_RIGHT:
			nangle = angle - self.TURN_ANGLE
		# elif a == self.CUT_FORWARD:
		# 	logging.debug("straight")
		nangle = wrap(nangle, self.ANGLE_MIN, self.ANGLE_MAX)
		nx = x + np.cos(angle) * self.dt
		ny = y + np.sin(angle) * self.dt
		# else:
		# 	assert False, "Unknown action taken..."
		## modify environment
		self.cloth_experiment.move_mouse(nx, ny)
		self.cloth_experiment.update()

		ns = np.hstack(([nx, ny, nangle], self._stepcount))
		self.state = ns.copy()
		terminal = self.isTerminal()
		r = self._reward_function(self.state, terminal)
		# logging.debug("Reference: (%d, %d) \t Actual  (%d, %d)" % tuple(np.hstack((self.reference_trajectory[self._stepcount], ns[:2]))))
		logging.debug("Reward: %f" % r)
		return r, ns, terminal, self.possibleActions()
Esempio n. 6
0
    def step(self, a):
        x, y, speed, heading = self.state
        # Map a number between [0,8] to a pair. The first element is
        # acceleration direction. The second one is the indicator for the wheel
        acc, turn = id2vec(a, [3, 3])
        acc -= 1  # Mapping acc to [-1, 0 1]
        turn -= 1  # Mapping turn to [-1, 0 1]

        # Calculate next state
        nx = x + speed * np.cos(heading) * self.delta_t
        ny = y + speed * np.sin(heading) * self.delta_t
        nspeed = speed + acc * self.ACCELERATION * self.delta_t
        nheading    = heading + speed / self.CAR_LENGTH * \
            np.tan(turn * self.TURN_ANGLE) * self.delta_t

        # Bound values
        nx = bound(nx, self.XMIN, self.XMAX)
        ny = bound(ny, self.YMIN, self.YMAX)
        nspeed = bound(nspeed, self.SPEEDMIN, self.SPEEDMAX)
        nheading = wrap(nheading, self.HEADINGMIN, self.HEADINGMAX)

        # Collision to wall => set the speed to zero
        if nx == self.XMIN or nx == self.XMAX or ny == self.YMIN or ny == self.YMAX:
            nspeed = 0

        ns = np.array([nx, ny, nspeed, nheading])
        self.state = ns.copy()
        terminal = self.isTerminal()
        r = self.GOAL_REWARD if terminal else self.STEP_REWARD
        return r, ns, terminal, self.possibleActions()
Esempio n. 7
0
    def step(self, a):
        x, y, speed, heading = self.state
        # Map a number between [0,8] to a pair. The first element is
        # acceleration direction. The second one is the indicator for the wheel
        acc, turn = id2vec(a, [3, 3])
        acc -= 1  # Mapping acc to [-1, 0 1]
        turn -= 1  # Mapping turn to [-1, 0 1]

        # Calculate next state
        nx = x + speed * np.cos(heading) * self.delta_t
        ny = y + speed * np.sin(heading) * self.delta_t
        nspeed = speed + acc * self.ACCELERATION * self.delta_t
        nheading = heading + speed / self.CAR_LENGTH * np.tan(turn * self.TURN_ANGLE) * self.delta_t

        # Bound values
        nx = bound(nx, self.XMIN, self.XMAX)
        ny = bound(ny, self.YMIN, self.YMAX)
        nspeed = bound(nspeed, self.SPEEDMIN, self.SPEEDMAX)
        nheading = wrap(nheading, self.HEADINGMIN, self.HEADINGMAX)

        # Collision to wall => set the speed to zero
        if nx == self.XMIN or nx == self.XMAX or ny == self.YMIN or ny == self.YMAX:
            nspeed = 0

        ns = np.array([nx, ny, nspeed, nheading])
        self.state = ns.copy()
        terminal = self.isTerminal()
        r = self.GOAL_REWARD if terminal else self.STEP_REWARD
        return r, ns, terminal, self.possibleActions()
Esempio n. 8
0
 def _check_value_bounds(self, nx, ny, nspeed, nheading):
     nx = bound(nx, self.XMIN, self.XMAX)
     ny = bound(ny, self.YMIN, self.YMAX)
     nspeed = bound(nspeed, self.SPEEDMIN, self.SPEEDMAX)
     nheading = wrap(nheading, self.HEADINGMIN, self.HEADINGMAX)
     ns = np.array([nx, ny, nspeed, nheading])
     return ns
Esempio n. 9
0
File: Acrobot.py Progetto: MLDL/rlpy
    def step(self, a):

        torque = self.AVAIL_TORQUE[a]
        s = self.state

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.random_state.uniform(-
                                                self.torque_noise_max, self.torque_noise_max)

        s_augmented = np.append(s, torque)
        for i in range(4):
            s_dot = np.array(self._dsdt(s_augmented, 0))
            s_augmented += s_dot * self.dt / 4.

            # make sure that we don't have 2 free pendulums but a "gymnast"
            # for k in range(2):
            #    if np.abs(s_augmented[k]) > np.pi:
            #        s_augmented[k] = np.sign(s_augmented[k]) * np.pi
            #        s_augmented[k + 2] = 0.
            s_augmented[0] = wrap(s_augmented[0], -np.pi, np.pi)
            s_augmented[1] = wrap(s_augmented[1], -np.pi, np.pi)
            s_augmented[2] = bound(
                s_augmented[2],
                -self.MAX_VEL_1,
                self.MAX_VEL_1)
            s_augmented[3] = bound(
                s_augmented[3],
                -self.MAX_VEL_2,
                self.MAX_VEL_2)

        ns = s_augmented[:4]  # omit action
        self.state = ns.copy()
        terminal = self.isTerminal()
        reward = -1. if not terminal else 0.
        return reward, ns, terminal, self.possibleActions()
Esempio n. 10
0
    def _stepFourState(self, s, a):
        """
        :param s: the four-dimensional cartpole state
        Performs a single step of the CartPoleDomain without modifying
        ``self.state`` - this is left to the ``step()`` function in child
        classes.
        """

        forceAction = self.AVAIL_FORCE[a]

        # Add noise to the force action
        if self.force_noise_max > 0:
            forceAction += self.random_state.uniform(-
                                                     self.force_noise_max, self.force_noise_max)

        # Now, augment the state with our force action so it can be passed to
        # _dsdt
        s_augmented = np.append(s, forceAction)

        #-------------------------------------------------------------------------#
        # There are several ways of integrating the nonlinear dynamics equations. #
        # For consistency with prior results, we tested the custom integration    #
        # method devloped by Lagoudakis & Parr (2003) for their Pendulum Inverted #
        # Balance task.                                                           #
        # For our own experiments we use rk4 from mlab or euler integration.      #
        #                                                                         #
        # Integration method         Sample runtime  Error with scipy.integrate   #
        #                                              (most accurate method)     #
        #  euler (custom)              ?min ??sec              ????%              #
        #  rk4 (mlab)                  3min 15sec            < 0.01%              #
        #  pendulum_ode45 (L&P '03)    7min 15sec            ~ 2.00%              #
        #  integrate.odeint (scipy)    9min 30sec            -------              #
        #                                                                         #
        # Use of these methods is supported by selectively commenting             #
        # sections below.                                                         #
        #-------------------------------------------------------------------------#
        if self.int_type == "euler":
            int_fun = self.euler_int
        elif self.int_type == "odeint":
            int_fun = scipy.integrate.odeint
        else:
            int_fun = rk4
        ns = int_fun(self._dsdt, s_augmented, [0, self.dt])
        # only care about final timestep of integration returned by integrator
        ns = ns[-1]

        ns = ns[0:4]  # [theta, thetadot, x, xDot]
        # ODEINT IS TOO SLOW!
        # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt])
        # self.s_continuous = ns_continuous[-1] # We only care about the state
        # at the ''final timestep'', self.dt

        theta = wrap(ns[StateIndex.THETA], -np.pi, np.pi)
        ns[StateIndex.THETA] = bound(
            theta,
            self.ANGLE_LIMITS[0],
            self.ANGLE_LIMITS[1])
        ns[StateIndex.THETA_DOT] = bound(
            ns[StateIndex.THETA_DOT],
            self.ANGULAR_RATE_LIMITS[0],
            self.ANGULAR_RATE_LIMITS[1])
        ns[StateIndex.X] = bound(
            ns[StateIndex.X],
            self.POSITION_LIMITS[0],
            self.POSITION_LIMITS[1])
        ns[StateIndex.X_DOT] = bound(
            ns[StateIndex.X_DOT],
            self.VELOCITY_LIMITS[0],
            self.VELOCITY_LIMITS[1])

        return ns