예제 #1
0
    def step(self, action):
        done = False
        action = np.asarray(action).squeeze()

        action[0] = np.clip(action[0], -self.max_F, self.max_F)
        action[1] = np.clip(action[1], -self.max_M, self.max_M)

        for _ in range(self.act_hold):
            ns = rk4(self._derivs, action, 0, self.dt, self.state)
            self.state[0] = ns[0]
            self.state[1] = ns[1]
            self.state[2] = wrap(ns[2], -2 * pi,
                                 2 * pi)  # We might not need to wrap ...

            self.state[3] = ns[3]
            self.state[4] = ns[4]
            self.state[5] = ns[5]

        # did it hit the wall? no if term = 0, yes if term = 1
        term = any(self.state[0] >= self.deadzone_x[:, 0]) and any(
            self.state[0] <= self.deadzone_x[:, 1]) and any(
                self.state[1] >= self.deadzone_y[:, 0]) and any(
                    self.state[1] <= self.deadzone_y[:, 1])

        xpen = -0.01 * np.clip((self.state[0] - self.xtarg)**2, -4, 4)
        ypen = -0.01 * np.clip((self.state[1] - self.ytarg)**2, -4, 4)
        thpen = -0.01 * np.clip((self.state[2] - self.theta_targ)**2, -2, 2)

        reward = xpen + ypen + thpen - 10 * term

        self.cur_step += 1
        if self.cur_step > self.num_steps or term:
            done = True

        return self._get_obs(), reward, done, {}
예제 #2
0
    def step(self, action):
        done = False

        # RL algorithms aware of the action space won't need this but things like the
        # imitation learning or energy shaping controllers might try feeding in something
        # above the torque limit
        torque = np.clip(action, -self.TORQUE_MAX, self.TORQUE_MAX)
        # torque = action
        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.np_random.uniform(-self.torque_noise_max, self.torque_noise_max)

        for _ in range(5):
            # ns = rk4(self._derivs, torque, 0, self.dt, self.state)
            ns = euler(self._derivs, torque, 0, self.dt, self.state)

            self.state[0] = wrap(ns[0], -2 * pi, 2 * pi)
            # self.state[0] = ns[0]
            self.state[1] = ns[1]
            # self.state[1] = np.clip(ns[1], -self.X_MAX, self.X_MAX)
            self.state[2] = ns[2]
            self.state[3] = ns[3]

        # self.state[2] = np.clip(ns[2], -self.DTHETA_MAX, self.DTHETA_MAX)
        # self.state[3] = np.clip(ns[3], -self.DX_MAX, self.DX_MAX)

        # Should reward be something we pass in ? I do like to mess with them a lot...

        if (pi - 0.2) < self.state[0] < (pi + 0.2):
            reward = 1.0
        else:
            reward = 0.0
            done = True

        #        if (np.pi - .1 < self.state[0] < np.pi + .1) and (-.1 < np.
        # upright = (np.cos(self.state[0]) + 1) / 2
        # centered = rewards.tolerance(self.state[1], margin=2)
        # centered = (1 + centered) / 2
        # small_control = rewards.tolerance(torque.item(), margin=1,
        #                                   value_at_margin=0,
        #                                   sigmoid='quadratic')
        # small_control = (4 + small_control) / 5
        # small_velocity = rewards.tolerance(self.state[2], margin=5)
        # small_velocity = (1 + small_velocity) / 2
        # reward = upright.mean() * small_control * small_velocity * centered

        self.cur_step += 1

        if self.cur_step > self.num_steps:
            done = True
        elif np.abs(self.state[1]) > self.X_MAX:
            # done = True
            reward -= 5

        return self.state.copy(), reward, done, {}
예제 #3
0
    def step(self, action):
        done = False

        # RL algorithms aware of the action space won't need this but things like the
        # imitation learning or energy shaping controllers might try feeding in something
        # above the torque limit
        # torque = np.clip(action, -self.TORQUE_MAX, self.TORQUE_MAX)
        torque = np.clip(action * 1, -self.TORQUE_MAX, self.TORQUE_MAX)

        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.np_random.uniform(-self.torque_noise_max,
                                             self.torque_noise_max)

        # Randomy apply a pertubation

        push = 0
        if (self.state[0] > 145 * pi / 180) and (self.state[0] <
                                                 215 * pi / 180):
            rand = random.random()
            if rand > 0.99:
                push = random.gauss(0, 0.2)

        self.state[0] += push

        # Do the integration update, we update the simulator at a higer rate than we update the control
        for _ in range(5):
            ns = rk4(self._derivs, torque, 0, self.dt, self.state)
            # ns = euler(self._derivs, torque, 0, self.dt, self.state)

            self.state[0] = wrap(ns[0], -2 * pi, 2 * pi)
            # self.state[0] = ns[0]
            self.state[1] = ns[1]
            # self.state[1] = np.clip(ns[1], -self.X_MAX, self.X_MAX)
            self.state[2] = ns[2]
            self.state[3] = ns[3]

        # self.state[2] = np.clip(ns[2], -self.DTHETA_MAX, self.DTHETA_MAX)
        # self.state[3] = np.clip(ns[3], -self.DX_MAX, self.DX_MAX)

        # Should reward be something we pass in ? I do like to mess with them a lot...
        reward = -np.cos(self.state[0])

        self.cur_step += 1
        if self.cur_step > self.num_steps:
            done = True
        elif np.abs(self.state[1]) > self.X_MAX:
            reward -= 5

        return self.state, reward, done, {}
예제 #4
0
    def step(self, action):
        done = False

        # RL algorithms aware of the action space won't need this but things like the
        # imitation learning or energy shaping controllers might try feeding in something
        # above the torque limit
        torque = np.clip(action, -self.TORQUE_MAX, self.TORQUE_MAX)
        # torque = action
        # Add noise to the force action
        if self.torque_noise_max > 0:
            torque += self.np_random.uniform(-self.torque_noise_max,
                                             self.torque_noise_max)

        for _ in range(5):
            # ns = euler(self._derivs, torque, 0, self.dt, self.state)
            ns = rk4(self._derivs, torque, 0, self.dt, self.state)

            self.state[0] = wrap(ns[0], -pi, pi)
            # self.state[0] = ns[0]
            self.state[1] = ns[1]
            # self.state[1] = np.clip(ns[1], -self.X_MAX, self.X_MAX)
            self.state[2] = ns[2]
            self.state[3] = ns[3]

        # self.state[2] = np.clip(ns[2], -self.DTHETA_MAX, self.DTHETA_MAX)
        # self.state[3] = np.clip(ns[3], -self.DX_MAX, self.DX_MAX)

        # Should reward be something we pass in ? I do like to mess with them a lot...
        #        reward = -np.cos(self.state[0]) + 2

        reward = (-5 * np.cos(self.state[0])) + 2

        self.cur_step += 1
        if self.cur_step > self.num_steps:
            done = True
        elif np.abs(self.state[1]) > self.X_MAX:
            done = True

        return self.state, reward, done, {}
예제 #5
0
 def _get_obs(self):
     obs = self.state.copy()
     obs[0] = wrap(obs[0], self.th1_range[0], self.th1_range[1])
     obs[1] = wrap(obs[1], self.th2_range[0], self.th2_range[1])
     return obs