def step(self, a): s = self.state torque = self.AVAIL_TORQUE[a] # Add noise to the force action if self.torque_noise_max > 0: torque += self.random_state.uniform(- self.torque_noise_max, self.torque_noise_max) # Now, augment the state with our force action so it can be passed to # _dsdt s_augmented = np.append(s, torque) ns = rk4(self._dsdt, s_augmented, [0, self.dt]) # only care about final timestep of integration returned by integrator ns = ns[-1] ns = ns[:4] # omit action # ODEINT IS TOO SLOW! # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt]) # self.s_continuous = ns_continuous[-1] # We only care about the state # at the ''final timestep'', self.dt ns[0] = wrap(ns[0], -np.pi, np.pi) ns[1] = wrap(ns[1], -np.pi, np.pi) ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1) ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2) self.state = ns.copy() terminal = self.isTerminal() reward = -1. if not terminal else 0. return reward, ns, terminal, self.possibleActions()
def step(self, a): s = self.state torque = self.AVAIL_TORQUE[a] # Add noise to the force action if self.torque_noise_max > 0: torque += self.random_state.uniform(-self.torque_noise_max, self.torque_noise_max) # Now, augment the state with our force action so it can be passed to # _dsdt s_augmented = np.append(s, torque) ns = rk4(self._dsdt, s_augmented, [0, self.dt]) # only care about final timestep of integration returned by integrator ns = ns[-1] ns = ns[:4] # omit action # ODEINT IS TOO SLOW! # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt]) # self.s_continuous = ns_continuous[-1] # We only care about the state # at the ''final timestep'', self.dt ns[0] = wrap(ns[0], -np.pi, np.pi) ns[1] = wrap(ns[1], -np.pi, np.pi) ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1) ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2) self.state = ns.copy() terminal = self.isTerminal() reward = self._reward_function(terminal) return reward, ns, terminal, self.possibleActions()
def step(self, a): torque = self.AVAIL_TORQUE[a] s = self.state # Add noise to the force action if self.torque_noise_max > 0: torque += self.random_state.uniform(-self.torque_noise_max, self.torque_noise_max) s_augmented = np.append(s, torque) for i in range(4): s_dot = np.array(self._dsdt(s_augmented, 0)) s_augmented += s_dot * self.dt / 4. # make sure that we don't have 2 free pendulums but a "gymnast" # for k in range(2): # if np.abs(s_augmented[k]) > np.pi: # s_augmented[k] = np.sign(s_augmented[k]) * np.pi # s_augmented[k + 2] = 0. s_augmented[0] = wrap(s_augmented[0], -np.pi, np.pi) s_augmented[1] = wrap(s_augmented[1], -np.pi, np.pi) s_augmented[2] = bound(s_augmented[2], -self.MAX_VEL_1, self.MAX_VEL_1) s_augmented[3] = bound(s_augmented[3], -self.MAX_VEL_2, self.MAX_VEL_2) ns = s_augmented[:4] # omit action self.state = ns.copy() terminal = self.isTerminal() reward = -1. if not terminal else 0. return reward, ns, terminal, self.possibleActions()
def step(self, a): s = self.state[:4] torque = self.AVAIL_TORQUE[a] # Add noise to the force action if self.torque_noise_max > 0: torque += self.random_state.uniform(-self.torque_noise_max, self.torque_noise_max) # Now, augment the state with our force action so it can be passed to # _dsdt s_augmented = np.append(s, torque) ns = rk4(self._dsdt, s_augmented, [0, self.dt]) # only care about final timestep of integration returned by integrator ns = ns[-1] ns = ns[:4] # omit action # ODEINT IS TOO SLOW! # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt]) # self.s_continuous = ns_continuous[-1] # We only care about the state # at the ''final timestep'', self.dt ns[0] = wrap(ns[0], -np.pi, np.pi) ns[1] = wrap(ns[1], -np.pi, np.pi) ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1) ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2) self.prev_states.append(s) ns = self.augment_state(ns) self.state = ns.copy() terminal = self.isTerminal() if not terminal: ########### GOAL FUNCTION: Bound ############ while not self.isTerminal() and self.goalfn(ns[:4], self.goalArray[0]): self.goalArray = self.goalArray[1:] print "Goal reached", len(self.prev_states), self.goalArray, self.encodingFunction(self.prev_states) import ipdb; ipdb.set_trace() ############################################ # default reward setup reward = self.STEP_REWARD if not terminal else self.GOAL_REWARD if not terminal and self.rewardFunction != None: reward = self.rewardFunction(self.prev_states, self.goalArray, self.STEP_REWARD, self.GOAL_REWARD) return reward, ns, terminal, self.possibleActions()
def step(self, a): self._stepcount += 1 # a = 0 # if self._runcount % 20 == 0: # self.cloth_experiment.render = True # logging.info("%d steps..." % self._stepcount) # else: # self.cloth_experiment.render = False x, y, angle = self.state[:3] nx, ny, nangle = x, y, angle if a == self.TURN_LEFT: nangle = angle + self.TURN_ANGLE elif a == self.TURN_RIGHT: nangle = angle - self.TURN_ANGLE # elif a == self.CUT_FORWARD: # logging.debug("straight") nangle = wrap(nangle, self.ANGLE_MIN, self.ANGLE_MAX) nx = x + np.cos(angle) * self.dt ny = y + np.sin(angle) * self.dt # else: # assert False, "Unknown action taken..." ## modify environment self.cloth_experiment.move_mouse(nx, ny) self.cloth_experiment.update() ns = np.hstack(([nx, ny, nangle], self._stepcount)) self.state = ns.copy() terminal = self.isTerminal() r = self._reward_function(self.state, terminal) # logging.debug("Reference: (%d, %d) \t Actual (%d, %d)" % tuple(np.hstack((self.reference_trajectory[self._stepcount], ns[:2])))) logging.debug("Reward: %f" % r) return r, ns, terminal, self.possibleActions()
def step(self, a): x, y, speed, heading = self.state # Map a number between [0,8] to a pair. The first element is # acceleration direction. The second one is the indicator for the wheel acc, turn = id2vec(a, [3, 3]) acc -= 1 # Mapping acc to [-1, 0 1] turn -= 1 # Mapping turn to [-1, 0 1] # Calculate next state nx = x + speed * np.cos(heading) * self.delta_t ny = y + speed * np.sin(heading) * self.delta_t nspeed = speed + acc * self.ACCELERATION * self.delta_t nheading = heading + speed / self.CAR_LENGTH * \ np.tan(turn * self.TURN_ANGLE) * self.delta_t # Bound values nx = bound(nx, self.XMIN, self.XMAX) ny = bound(ny, self.YMIN, self.YMAX) nspeed = bound(nspeed, self.SPEEDMIN, self.SPEEDMAX) nheading = wrap(nheading, self.HEADINGMIN, self.HEADINGMAX) # Collision to wall => set the speed to zero if nx == self.XMIN or nx == self.XMAX or ny == self.YMIN or ny == self.YMAX: nspeed = 0 ns = np.array([nx, ny, nspeed, nheading]) self.state = ns.copy() terminal = self.isTerminal() r = self.GOAL_REWARD if terminal else self.STEP_REWARD return r, ns, terminal, self.possibleActions()
def step(self, a): x, y, speed, heading = self.state # Map a number between [0,8] to a pair. The first element is # acceleration direction. The second one is the indicator for the wheel acc, turn = id2vec(a, [3, 3]) acc -= 1 # Mapping acc to [-1, 0 1] turn -= 1 # Mapping turn to [-1, 0 1] # Calculate next state nx = x + speed * np.cos(heading) * self.delta_t ny = y + speed * np.sin(heading) * self.delta_t nspeed = speed + acc * self.ACCELERATION * self.delta_t nheading = heading + speed / self.CAR_LENGTH * np.tan(turn * self.TURN_ANGLE) * self.delta_t # Bound values nx = bound(nx, self.XMIN, self.XMAX) ny = bound(ny, self.YMIN, self.YMAX) nspeed = bound(nspeed, self.SPEEDMIN, self.SPEEDMAX) nheading = wrap(nheading, self.HEADINGMIN, self.HEADINGMAX) # Collision to wall => set the speed to zero if nx == self.XMIN or nx == self.XMAX or ny == self.YMIN or ny == self.YMAX: nspeed = 0 ns = np.array([nx, ny, nspeed, nheading]) self.state = ns.copy() terminal = self.isTerminal() r = self.GOAL_REWARD if terminal else self.STEP_REWARD return r, ns, terminal, self.possibleActions()
def _check_value_bounds(self, nx, ny, nspeed, nheading): nx = bound(nx, self.XMIN, self.XMAX) ny = bound(ny, self.YMIN, self.YMAX) nspeed = bound(nspeed, self.SPEEDMIN, self.SPEEDMAX) nheading = wrap(nheading, self.HEADINGMIN, self.HEADINGMAX) ns = np.array([nx, ny, nspeed, nheading]) return ns
def step(self, a): torque = self.AVAIL_TORQUE[a] s = self.state # Add noise to the force action if self.torque_noise_max > 0: torque += self.random_state.uniform(- self.torque_noise_max, self.torque_noise_max) s_augmented = np.append(s, torque) for i in range(4): s_dot = np.array(self._dsdt(s_augmented, 0)) s_augmented += s_dot * self.dt / 4. # make sure that we don't have 2 free pendulums but a "gymnast" # for k in range(2): # if np.abs(s_augmented[k]) > np.pi: # s_augmented[k] = np.sign(s_augmented[k]) * np.pi # s_augmented[k + 2] = 0. s_augmented[0] = wrap(s_augmented[0], -np.pi, np.pi) s_augmented[1] = wrap(s_augmented[1], -np.pi, np.pi) s_augmented[2] = bound( s_augmented[2], -self.MAX_VEL_1, self.MAX_VEL_1) s_augmented[3] = bound( s_augmented[3], -self.MAX_VEL_2, self.MAX_VEL_2) ns = s_augmented[:4] # omit action self.state = ns.copy() terminal = self.isTerminal() reward = -1. if not terminal else 0. return reward, ns, terminal, self.possibleActions()
def _stepFourState(self, s, a): """ :param s: the four-dimensional cartpole state Performs a single step of the CartPoleDomain without modifying ``self.state`` - this is left to the ``step()`` function in child classes. """ forceAction = self.AVAIL_FORCE[a] # Add noise to the force action if self.force_noise_max > 0: forceAction += self.random_state.uniform(- self.force_noise_max, self.force_noise_max) # Now, augment the state with our force action so it can be passed to # _dsdt s_augmented = np.append(s, forceAction) #-------------------------------------------------------------------------# # There are several ways of integrating the nonlinear dynamics equations. # # For consistency with prior results, we tested the custom integration # # method devloped by Lagoudakis & Parr (2003) for their Pendulum Inverted # # Balance task. # # For our own experiments we use rk4 from mlab or euler integration. # # # # Integration method Sample runtime Error with scipy.integrate # # (most accurate method) # # euler (custom) ?min ??sec ????% # # rk4 (mlab) 3min 15sec < 0.01% # # pendulum_ode45 (L&P '03) 7min 15sec ~ 2.00% # # integrate.odeint (scipy) 9min 30sec ------- # # # # Use of these methods is supported by selectively commenting # # sections below. # #-------------------------------------------------------------------------# if self.int_type == "euler": int_fun = self.euler_int elif self.int_type == "odeint": int_fun = scipy.integrate.odeint else: int_fun = rk4 ns = int_fun(self._dsdt, s_augmented, [0, self.dt]) # only care about final timestep of integration returned by integrator ns = ns[-1] ns = ns[0:4] # [theta, thetadot, x, xDot] # ODEINT IS TOO SLOW! # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt]) # self.s_continuous = ns_continuous[-1] # We only care about the state # at the ''final timestep'', self.dt theta = wrap(ns[StateIndex.THETA], -np.pi, np.pi) ns[StateIndex.THETA] = bound( theta, self.ANGLE_LIMITS[0], self.ANGLE_LIMITS[1]) ns[StateIndex.THETA_DOT] = bound( ns[StateIndex.THETA_DOT], self.ANGULAR_RATE_LIMITS[0], self.ANGULAR_RATE_LIMITS[1]) ns[StateIndex.X] = bound( ns[StateIndex.X], self.POSITION_LIMITS[0], self.POSITION_LIMITS[1]) ns[StateIndex.X_DOT] = bound( ns[StateIndex.X_DOT], self.VELOCITY_LIMITS[0], self.VELOCITY_LIMITS[1]) return ns