def do_predictor_rollout(predictor_args, initial_state, act_list, noise_list,
                         step):
    """act_list: list with num_rollout_per_cpu elements, each element is np.array with size (H, dim_u)"""
    # print('begin rollout...')
    predictor = Predictor(*predictor_args)
    print('predictor built successfully')
    paths = []
    N = len(act_list)
    H = act_list[0].shape[0]
    for i in range(N):
        predictor.catch_up(*initial_state)
        act = []
        noise = []
        obs = []
        cost = []
        for k in range(H):
            obs.append(predictor._get_obs())
            act.append(act_list[i][k])
            noise.append(noise_list[i][k])
            c = predictor.predict(act[-1], step)
            cost.append(c)

        path = dict(observations=np.array(obs),
                    actions=np.array(act),
                    costs=np.array(cost),
                    noise=np.array(noise))
        paths.append(path)

    return paths
Beispiel #2
0
class MPPI():
    """MPPI algorithm for pushing """
    def __init__(self, K, T, A):
        self.A = A  # action scale
        self.T = T  # time steps per sequence
        self.predictor = Predictor(1, 2, 1, self.T)
        self.trajectory = Trajectory()
        self.trajectory.reset()
        self.K = K  # K sample action sequences
        self.lambd = 1

        self.dim_u = 1  # U is theta
        # action init
        self.U_reset()

        self.u_init = np.array([0.0])
        self.cost = np.zeros([self.K])
        self.noise = np.random.uniform(-3.14,
                                       3.14,
                                       size=(self.K, self.T, self.dim_u))
        # self.noise = np.zeros([self.K, self.T, self.dim_u])

    def _compute_cost(self, k, step):
        # self.noise[k] = np.concatenate([np.random.normal(loc = 0, scale = 1, size = (self.T, 1)), np.random.rand(self.T, 1)], axis = 1)
        self.noise[k] = np.clip(
            np.random.normal(loc=0, scale=2, size=(self.T, 1)), -1.57, 1.57)
        # self.noise[k] = np.clip(np.random.uniform(-np.pi, np.pi, size = (self.T, 1)), -3.14, 3.14)
        eps = self.noise[k]
        self.predictor.catch_up(
            self.trajectory.get_relative_state(),
            self.trajectory.get_absolute_state(),
            self.trajectory.get_obstacle_position(),
            self.trajectory.get_goal_position(), step
        )  # make the shadow state the same as the actual robot and object state
        for t in range(self.T):
            if t > 0:
                eps[t] = 0.4 * eps[t - 1] + 0.6 * eps[t]
            self.noise[k][t] = eps[t]
            theta = self.U[t] + eps[t]
            action = copy.copy(
                self.A *
                np.concatenate([np.sin(theta), np.cos(theta)]))
            cost = self.predictor.predict(
                action, step)  # there will be shadow states in predictor
            self.cost[k] += cost

    def compute_cost(self, step):
        for k in range(self.K):
            self._compute_cost(k, step)

    def trajectory_clear(self):
        self.trajectory.reset()

    def trajectory_set_goal(self, pos_x, pos_y, theta):
        self.trajectory.set_goal(pos_x, pos_y, theta)

    def trajectory_update_state(self, pose_object, pose_tool):
        self.trajectory.update_state(pose_object, pose_tool)

    def trajectory_update_action(self, action):
        self.trajectory.update_action(action)

    def compute_noise_action(self):
        beta = np.min(self.cost)
        eta = np.sum(np.exp((-1 / self.lambd) * (self.cost - beta))) + 1e-6
        w = (1 / eta) * np.exp((-1 / self.lambd) * (self.cost - beta))

        self.U += [np.dot(w, self.noise[:, t]) for t in range(self.T)]
        # print(self.U)
        theta = self.U[0]
        action = copy.copy(
            self.A *
            np.concatenate([np.sin(theta), np.cos(theta)]))

        return action
        # return theta

    def compute_noise_theta(self):
        beta = np.min(self.cost)
        eta = np.sum(np.exp((-1 / self.lambd) * (self.cost - beta))) + 1e-6
        w = (1 / eta) * np.exp((-1 / self.lambd) * (self.cost - beta))

        self.U += [np.dot(w, self.noise[:, t]) for t in range(self.T)]
        # print(self.U)
        theta = self.U[0]
        action = copy.copy(
            self.A *
            np.concatenate([np.sin(theta), np.cos(theta)]))

        return theta

    def U_reset(self):
        self.U = np.zeros([self.T, self.dim_u])
        self.U[:, 0] = 1.57
        # self.U[:, 1] = 0.8

    def get_K(self):
        return self.K

    def U_update(self):
        self.U = np.roll(self.U, -1, axis=0)
        self.U[-1] = self.u_init

    def cost_clear(self):
        self.cost = np.zeros([self.K])
Beispiel #3
0
class MPPI():
    """MPPI algorithm for pushing """
    def __init__(self, env, K, T):
        self.env = env
        self.T = T  # time steps per sequence
        self.predictor = Predictor(1, 2, 1, self.T)
        # self.predictor = Predictor(1, 1, 2, self.T)
        self.trajectory = Trajectory(self.env)
        self.trajectory.reset()
        self.K = K  # K sample action sequences
        # self.T = T # time steps per sequence
        self.lambd = 1

        # self.dim_u = self.env.action_space.sample().shape[0]
        self.dim_u = 2
        self.U = np.zeros([self.T, self.dim_u])
        self.time_limit = self.env._max_episode_steps

        self.u_init = np.zeros([self.dim_u])
        self.cost = np.zeros([self.K])
        self.noise = np.random.normal(loc=0,
                                      scale=1,
                                      size=(self.K, self.T, self.dim_u))

    def compute_cost(self, k, step):
        self.noise[k] = np.random.normal(loc=0,
                                         scale=1,
                                         size=(self.T, self.dim_u))
        eps = self.noise[k]
        # print('noiseK: ', self.noise[k])
        self.predictor.catch_up(
            self.trajectory.get_relative_state(),
            self.trajectory.get_absolute_state(),
            self.trajectory.get_obstacle_position(),
            self.trajectory.get_goal_position(), step
        )  # make the shadow state the same as the actual robot and object state
        for t in range(self.T):
            if t > 0:
                eps[t] = 0.8 * eps[t - 1] + 0.2 * eps[t]
            self.noise[k][t] = eps[t]
            action = self.U[t] + eps[t]
            # print('action: ', action)
            cost = self.predictor.predict(
                action, step)  # there will be shadow states in predictor
            # print('predict step: ', t + 1, 'robot predict position: ', self.predictor.absolute_state[step + t + 1][3:])
            # print('robot actual position current: ', self.predictor.absolute_state[step][3:])
            self.cost[k] += cost

    def rollout(self, episode):
        for episode in range(episode):
            print('episode: {}'.format(episode))
            obs = self.env.reset()
            self.trajectory.reset()
            self.trajectory.update_state(obs)
            self.U = np.zeros([self.T, self.dim_u])
            for step in range(self.time_limit):
                print('step: ', step)
                # self.env.render()
                for k in range(self.K):
                    self.compute_cost(k, step)

                beta = np.min(self.cost)
                eta = np.sum(np.exp(
                    (-1 / self.lambd) * (self.cost - beta))) + 1e-6
                w = (1 / eta) * np.exp((-1 / self.lambd) * (self.cost - beta))

                self.U += [np.dot(w, self.noise[:, t]) for t in range(self.T)]
                obs, r, _, _ = self.env.step(
                    np.concatenate([self.U[0], np.zeros([2])]))
                self.trajectory.update_state(obs)
                self.trajectory.update_action(self.U[0])
                self.env.render()
                # print('step: ', step)
                self.U = np.roll(self.U, -1, axis=0)  #shift left
                self.U[-1] = self.u_init
                self.cost = np.zeros([self.K])  # reset cost