Example #1
0
    def __init__(self, c_o, c_g, c_d, time_steps):
        self.T = time_steps  # time_steps to predict
        self.model = Model(3, 2, 100, 64, 64, 4, load_data=False)
        self.model.load_weights(WEIGHTS_PATH)
        # self.model = model
        self.trajectory_length = self.model.env_time_step
        self.input_sequence_len = self.model.time_steps
        self.relative_state = np.zeros(
            [self.trajectory_length + self.T, 7]
        )  # all relative to object coordinate, (delta_x,delta_y,delta_theta,x_robot, y_robot, action_x, action_y)
        self.absolute_state = np.zeros(
            [self.trajectory_length + self.T, 5]
        )  # all absolute positions (x_object,y_object,theta_object,x_robot,y_robot)

        self.object_absolute_trajectory_imagine = []  # the shooting trajectory
        self.robot_absolute_trajectory_imagine = []

        self.obstacle_position = np.zeros([2])  # obstacle absolute position
        self.goal_position = np.zeros(
            [3]
        )  # goal_position and orientation absolute data(relative to world coordinate)
        self.count = 0  # reset to zero every catch_up, count how many states have been changed

        self.c_o = c_o  # cost coefficient for obstacle distance
        self.c_g = c_g  # cost coefficient for goal distance
        self.c_d = c_d  # cost coefficient for distance between object and robot
    def __init__(self, env, K, T):
        self.model = Model(3, 2, 100, 64, 64, 10, load_data=False)
        self.model.load_weights(WEIGHTS_PATH)
        self.env = env
        self.T = T  # time steps per sequence

        self.predictor_args = [self.model, 1, 3, 1, self.T]

        self.trajectory = Trajectory(self.env)
        self.K = K  # K sample action sequences
        # self.T = T # time steps per sequence
        self.lambd = 1

        # self.dim_u = self.env.action_space.sample().shape[0]
        self.dim_u = 2
        self.U = np.zeros([self.T, self.dim_u])
        self.base_act = np.zeros([self.T, self.dim_u])
        self.time_limit = self.env._max_episode_steps

        self.u_init = np.zeros([self.dim_u])
        self.cost = np.zeros([self.K])
        self.noise = np.random.normal(loc=0,
                                      scale=1,
                                      size=(self.K, self.T, self.dim_u))
Example #3
0
class Predictor():
    def __init__(self, c_o, c_g, c_d, time_steps):
        self.T = time_steps  # time_steps to predict
        self.model = Model(3, 2, 100, 64, 64, 4, load_data=False)
        self.model.load_weights(WEIGHTS_PATH)
        # self.model = model
        self.trajectory_length = self.model.env_time_step
        self.input_sequence_len = self.model.time_steps
        self.relative_state = np.zeros(
            [self.trajectory_length + self.T, 7]
        )  # all relative to object coordinate, (delta_x,delta_y,delta_theta,x_robot, y_robot, action_x, action_y)
        self.absolute_state = np.zeros(
            [self.trajectory_length + self.T, 5]
        )  # all absolute positions (x_object,y_object,theta_object,x_robot,y_robot)
        self.obstacle_position = np.zeros([2])  # obstacle absolute position
        self.goal_position = np.zeros(
            [3]
        )  # goal_position and orientation absolute data(relative to world coordinate)
        self.count = 0  # reset to zero every catch_up, count how many states have been changed

        self.c_o = c_o  # cost coefficient for obstacle distance
        self.c_g = c_g  # cost coefficient for goal distance
        self.c_d = c_d  # cost coefficient for distance between object and robot

    def catch_up(self, relative_state, absolute_state, obstacle_position,
                 goal_position, step):
        """update the current state and trajectory history of this episode for this sample agent
        Args:
            relative_state: np.array(step + 1, 7)
            absolute_state: np.array(step + 1, 5)
            obstacle_position: np.array(2,)
            step: (int) timestep
        """
        assert relative_state.shape == (step + 1, 7)
        assert absolute_state.shape == (step + 1, 5)
        assert obstacle_position.shape == (2, )
        assert goal_position.shape == (3, )

        # relative state(for input of the model)
        self.relative_state[:(
            step + 1
        )] = relative_state[:]  # robot action of the relative_state[step] is zeros(fake)
        self.relative_state[(step + 1):] = np.zeros([7])

        # absolute state(for cost_fun)
        self.absolute_state[:(step + 1)] = absolute_state[:]
        self.absolute_state[(step + 1):] = np.zeros([5])

        # obstacle position and goal position
        self.goal_position[:] = goal_position[:]
        self.obstacle_position[:] = obstacle_position[:]

        # how many states it has predicted
        self.count = 0  #reset count

    def get_relative_action(self, action, step):
        object_pos = self.absolute_state[step + self.count][:3]
        theta = object_pos[2]
        action_x = action[0]
        action_y = action[1]
        action_relative_to_object_x = action_x * np.cos(
            theta) + action_y * np.sin(theta)
        action_relative_to_object_y = action_y * np.cos(
            theta) - action_x * np.sin(theta)
        return np.array(
            [action_relative_to_object_x, action_relative_to_object_y])

    def predict(self, action, step):
        """action: relative to world coordinate"""
        assert action.shape == (2, )
        # print('action: ', action)
        action = 0.05 * np.clip(action, -1, 1)
        input = np.zeros([self.input_sequence_len, 7])

        # transfer the action to object coordinate
        relative_action = self.get_relative_action(action, step)

        # update the action data for current state which is set as (0. , 0.)
        # print('predictor predict ', step)
        self.relative_state[step + self.count][5:] = relative_action[:]

        # get input sequence for model, the model need self.input_sequence_len steps sequence as input
        for i in range(self.input_sequence_len):
            idx = i + step + self.count + 1 - self.input_sequence_len
            if idx < 0:
                input[i] = np.zeros([7])
                input[i][3:5] = self.relative_state[0][3:5]
            else:
                input[i] = self.relative_state[idx]

        input = input[np.newaxis, :]
        state_increment = self.model.predict(
            input)  # [delta_x, delta_y, delta_theta]
        # state_increment = np.zeros([3])

        # update self.relative_state and self.count
        self.relative_state[step + self.count + 1][:3] = state_increment[:]
        # print('step: ', step, 'count:', self.count)
        self.relative_state[step + self.count +
                            1][3:5] = self.get_prediction_relative_position(
                                state_increment, step)[:]

        # update self.absolute_state
        self.absolute_state[step + self.count +
                            1][:3] = self.get_prediction_absolute_position(
                                state_increment, step)[:]
        self.absolute_state[
            step + self.count +
            1][3:] = self.absolute_state[step + self.count][3:] + action

        # compute the cost
        # print('step: ', step)
        # print('current robot pos: ', self.absolute_state[step][3:])
        cost = self.cost_fun(self.absolute_state[step + self.count + 1][:3],
                             self.absolute_state[step + self.count + 1][3:],
                             self.obstacle_position, self.goal_position)

        # update count
        self.count += 1
        return cost

    def test(self):
        self.model.model.summary()

    def cost_fun(self, object_position, robot_position, obstacle_position,
                 goal_position):
        c_o = self.c_o
        c_g = self.c_g
        c_d = self.c_d
        object_position_ = object_position[:2]
        object_rotation_ = object_position[2]
        fixed_pos = np.array([1.35, 0.65])
        # print('current robot pos: ', robot_position)

        cost = c_d * np.squeeze(
            np.sum(np.square(object_position_ - robot_position)))
        cost = c_g * np.squeeze(
            np.sum(np.square(object_position_ - goal_position[:2]))
        ) + 0.3 * c_d * np.squeeze(
            np.sum(np.square(object_position_ - robot_position)))
        # cost = c_d*np.squeeze(np.sum(np.square(goal_position[:2] - robot_position)))
        # print('distance: ', cost)
        return cost

    def get_prediction_relative_position(self, state_increment, step):
        """robot position relative to object"""
        x_original = self.relative_state[step + self.count][3]
        y_original = self.relative_state[step + self.count][4]
        action_x = self.relative_state[step + self.count][5]
        action_y = self.relative_state[step + self.count][6]

        coordinate_increment_x = state_increment[0]
        coordinate_increment_y = state_increment[1]
        coordinate_increment_theta = state_increment[2]
        # trasition without rotation
        x = x_original + action_x - coordinate_increment_x
        y = y_original + action_y - coordinate_increment_y
        # rotation
        x_relative_update = x * np.cos(
            coordinate_increment_theta) + y * np.sin(
                coordinate_increment_theta)
        y_relative_update = y * np.cos(
            coordinate_increment_theta) - x * np.sin(
                coordinate_increment_theta)

        return np.array([x_relative_update, y_relative_update])

    def get_prediction_absolute_position(self, state_increment, step):
        object_x_original = self.absolute_state[step + self.count][0]
        object_y_original = self.absolute_state[step + self.count][1]
        object_theta_original = self.absolute_state[step + self.count][2]

        increment_x = state_increment[0]
        increment_y = state_increment[1]
        increment_theta = state_increment[2]

        delta_x = increment_x * np.cos(
            object_theta_original) - increment_y * np.sin(
                object_theta_original)
        delta_y = increment_x * np.sin(
            object_theta_original) + increment_y * np.cos(
                object_theta_original)

        x_absolute = delta_x + object_x_original
        y_absolute = delta_y + object_y_original
        theta_absolute = object_theta_original + increment_theta

        return np.array([x_absolute, y_absolute, theta_absolute])

    def _get_obs(self):
        return 0
class MPPI():
    """MPPI algorithm for pushing """
    def __init__(self, env, K, T):
        self.model = Model(3, 2, 100, 64, 64, 10, load_data=False)
        self.model.load_weights(WEIGHTS_PATH)
        self.env = env
        self.T = T  # time steps per sequence

        self.predictor_args = [self.model, 1, 3, 1, self.T]

        self.trajectory = Trajectory(self.env)
        self.K = K  # K sample action sequences
        # self.T = T # time steps per sequence
        self.lambd = 1

        # self.dim_u = self.env.action_space.sample().shape[0]
        self.dim_u = 2
        self.U = np.zeros([self.T, self.dim_u])
        self.base_act = np.zeros([self.T, self.dim_u])
        self.time_limit = self.env._max_episode_steps

        self.u_init = np.zeros([self.dim_u])
        self.cost = np.zeros([self.K])
        self.noise = np.random.normal(loc=0,
                                      scale=1,
                                      size=(self.K, self.T, self.dim_u))

    def compute_cost(self, k, step):
        self.noise[k] = np.random.normal(loc=0,
                                         scale=1,
                                         size=(self.T, self.dim_u))
        self.predictor.catch_up(
            self.trajectory.get_goal_state(),
            self.trajectory.get_initial_state(),
            self.trajectory.get_action_history(), step
        )  # make the shadow state the same as the actual robot and object state
        for t in range(self.T):
            action = self.U[t] + self.noise[k][t]
            cost = self.predictor.predict(
                action, step)  # there will be shadow states in predictor
            self.cost[k] += cost
        return self.cost[k]

    def mppi_compute_cost_parallel(self, predictor_args, initial_state,
                                   rollout_num_per_cpu, base_act, step):
        return compute_cost_parallel(predictor_args, initial_state,
                                     rollout_num_per_cpu, base_act, step)

    def rollout(self, episode):
        for episode in range(episode):
            print('episode: {}'.format(episode))
            obs = self.env.reset()
            self.trajectory.reset()
            self.trajectory.update_state(obs)
            self.U = np.zeros([self.T, self.dim_u])
            # rollout_num_per_cpu = 1
            rollout_num_per_cpu = self.K // mp.cpu_count()

            for step in np.arange(self.time_limit):
                # env render
                self.env.render()
                print('step: ', step + 1)
                # update current_state
                current_state = [
                    self.trajectory.get_relative_state(),
                    self.trajectory.get_absolute_state(),
                    self.trajectory.get_obstacle_position(),
                    self.trajectory.get_goal_position(), step
                ]
                # compute costs for K sample in multiprocess
                paths = self.mppi_compute_cost_parallel(
                    self.predictor_args, current_state, rollout_num_per_cpu,
                    self.U, step
                )  # list of K elements, each element is a dict{observation: (self.T, dim_obs), actions: (self.T, 2), costs: (self.T,)}
                # get cost from paths
                self.cost = score_trajectory(paths)
                # get actions from paths
                self.noise = stack_noise(paths)

                beta = np.min(self.cost)
                eta = np.sum(np.exp(
                    (-1 / self.lambd) * (self.cost - beta))) + 1e-6
                w = (1 / eta) * np.exp((-1 / self.lambd) * (self.cost - beta))

                self.U += [np.dot(w, self.noise[:, t]) for t in range(self.T)]
                obs, r, _, _ = self.env.step(
                    np.concatenate([self.U[0], np.zeros([2])]))

                # update obs and action for trajectory
                self.trajectory.update_state(obs)
                self.trajectory.update_action(self.U[0])
                # self.env.render()
                # print('step: ', step)
                self.U = np.roll(self.U, -1, axis=0)  #shift left
                self.U[-1] = self.u_init
                self.cost = np.zeros([self.K])  # reset cost
Example #5
0
def main():
    model = Model(3, 2, 60, 64, 64, 10)
    env_step = model.env_time_step
    time_sequence = model.time_steps
    model.model.load_weights(WEIGHTS_PATH)

    test_robot_data_raw = np.load(
        os.path.join(DATA_PATH, 'robot_pure_test.npy')).reshape(-1, 6)
    test_object_data_raw = np.load(
        os.path.join(DATA_PATH, 'object_pure_test.npy')).reshape(-1, 13)

    test_object_position = test_object_data_raw[:, :2]
    test_object_rotation = (np.arccos(test_object_data_raw[:, 3]) * 2).reshape(
        -1, 1)
    test_object_state = np.concatenate(
        (test_object_position, test_object_rotation),
        axis=1).reshape(-1, env_step, 3)  # [episode_num, 101, 3]

    test_robot_state = test_robot_data_raw[:, :2].reshape(
        -1, env_step, 2)  # [episode_num, 101, 2]

    # test_data_set = model.data_preprocess(test_object_data_raw, test_robot_data_raw)

    # predict = model.model.predict(test_data_set['input'])
    # error = predict - target

    ### generate predict trajectories ####
    episode_num = int(len(test_robot_state))
    ### initialize ###
    predict_trajectory = np.zeros([episode_num, env_step, 3])
    original_trajectory = np.zeros([episode_num, env_step, 3])

    original_data_set = model.data_preprocess(test_object_data_raw,
                                              test_robot_data_raw)
    original_output = original_data_set['target']
    original_output = original_output.reshape(episode_num, -1, time_sequence,
                                              3)
    assert original_output.shape == (episode_num,
                                     (env_step - 1) // time_sequence,
                                     time_sequence, 3)

    #### loop ####
    for episode in range(episode_num):
        trajectory_p = np.zeros(
            [env_step, 3]
        )  # the first time_sequence points are regarded as the same as targets because we need these steps to predict
        # trajectory_o = np.zeros([env_step, 3])
        trajectory_p[:time_sequence] = test_object_state[
            episode][:time_sequence]
        # trajectory_o[:time_sequence] = test_object_state[episode][:time_sequence]

        robot_state = test_robot_state[episode]
        # print(robot_state.shape)
        for step in range(time_sequence, env_step):
            trajectory_p[step] = get_object_state_increment_world_coordinate(
                model, step, time_sequence, trajectory_p,
                robot_state) + trajectory_p[step - 1]
            # trajectory_o[step] = get_object_state_increment_world_coordinate_original(step, time_sequence, episode, original_output, trajectory_o) + trajectory_o[step - 1]

        predict_trajectory[episode] = trajectory_p
        # original_trajectory[episode] = trajectory_o

        template = 'Generate {} episodes.'
        print(template.format(episode + 1))

    # print(predict_trajectory.shape)
    np.save(os.path.join(SAVED_DATA_PATH, 'pure_test'), predict_trajectory)