def step(self, a): s = self.state torque = self.AVAIL_TORQUE[a] # Add noise to the force action if self.torque_noise_max > 0: torque += self.random_state.uniform(-self.torque_noise_max, self.torque_noise_max) # Now, augment the state with our force action so it can be passed to # _dsdt s_augmented = np.append(s, torque) ns = rk4(self._dsdt, s_augmented, [0, self.dt]) # only care about final timestep of integration returned by integrator ns = ns[-1] ns = ns[:4] # omit action # ODEINT IS TOO SLOW! # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt]) # self.s_continuous = ns_continuous[-1] # We only care about the state # at the ''final timestep'', self.dt ns[0] = wrap(ns[0], -np.pi, np.pi) ns[1] = wrap(ns[1], -np.pi, np.pi) ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1) ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2) self.state = ns.copy() terminal = self.isTerminal() reward = self._reward_function(terminal) return reward, ns, terminal, self.possibleActions()
def step(self, a): s = self.state torque = self.AVAIL_TORQUE[a] # Add noise to the force action if self.torque_noise_max > 0: torque += self.random_state.uniform(- self.torque_noise_max, self.torque_noise_max) # Now, augment the state with our force action so it can be passed to # _dsdt s_augmented = np.append(s, torque) ns = rk4(self._dsdt, s_augmented, [0, self.dt]) # only care about final timestep of integration returned by integrator ns = ns[-1] ns = ns[:4] # omit action # ODEINT IS TOO SLOW! # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt]) # self.s_continuous = ns_continuous[-1] # We only care about the state # at the ''final timestep'', self.dt ns[0] = wrap(ns[0], -np.pi, np.pi) ns[1] = wrap(ns[1], -np.pi, np.pi) ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1) ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2) self.state = ns.copy() terminal = self.isTerminal() reward = -1. if not terminal else 0. return reward, ns, terminal, self.possibleActions()
def step(self, a): s = self.state[:4] torque = self.AVAIL_TORQUE[a] # Add noise to the force action if self.torque_noise_max > 0: torque += self.random_state.uniform(-self.torque_noise_max, self.torque_noise_max) # Now, augment the state with our force action so it can be passed to # _dsdt s_augmented = np.append(s, torque) ns = rk4(self._dsdt, s_augmented, [0, self.dt]) # only care about final timestep of integration returned by integrator ns = ns[-1] ns = ns[:4] # omit action # ODEINT IS TOO SLOW! # ns_continuous = integrate.odeint(self._dsdt, self.s_continuous, [0, self.dt]) # self.s_continuous = ns_continuous[-1] # We only care about the state # at the ''final timestep'', self.dt ns[0] = wrap(ns[0], -np.pi, np.pi) ns[1] = wrap(ns[1], -np.pi, np.pi) ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1) ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2) self.prev_states.append(s) ns = self.augment_state(ns) self.state = ns.copy() terminal = self.isTerminal() if not terminal: ########### GOAL FUNCTION: Bound ############ while not self.isTerminal() and self.goalfn(ns[:4], self.goalArray[0]): self.goalArray = self.goalArray[1:] print "Goal reached", len(self.prev_states), self.goalArray, self.encodingFunction(self.prev_states) import ipdb; ipdb.set_trace() ############################################ # default reward setup reward = self.STEP_REWARD if not terminal else self.GOAL_REWARD if not terminal and self.rewardFunction != None: reward = self.rewardFunction(self.prev_states, self.goalArray, self.STEP_REWARD, self.GOAL_REWARD) return reward, ns, terminal, self.possibleActions()
def step(self, a): d = self.d a = self.actions[a] s = np.hstack((self.pos_cm, self.theta, self.v_cm, self.dtheta)) ns = rk4(dsdt, s, [0, self.dt], a, self.P, self.inertia, self.G, self.U, self.lengths, self.masses, self.k1, self.k2)[-1] self.theta = ns[2:2 + d] self.v_cm = ns[2 + d:4 + d] self.dtheta = ns[4 + d:] self.pos_cm = ns[:2] return (self._reward(a), self.state, self.isTerminal(), self.possibleActions())
def step(self, a): d = self.d a = self.actions[a] s = np.hstack((self.pos_cm, self.theta, self.v_cm, self.dtheta)) ns = rk4( dsdt, s, [0, self.dt], a, self.P, self.inertia, self.G, self.U, self.lengths, self.masses, self.k1, self.k2)[-1] self.theta = ns[2:2 + d] self.v_cm = ns[2 + d:4 + d] self.dtheta = ns[4 + d:] self.pos_cm = ns[:2] return ( self._reward( a), self.state, self.isTerminal(), self.possibleActions() )