Esempio n. 1
0
def update():
    global curve, plot_container, data_storage, foot_locations, tick

    horizontal_velocity = (0.2, 0.0)
    yaw_rate = 0.0
    target_height = -0.16
    current_height = target_height
    foot_locations = controller.step_gait(foot_locations,
                                          np.array(horizontal_velocity),
                                          yaw_rate, target_height,
                                          current_height, tick)
    tick += 1
    joints = ik(foot_locations, quat)  # replace with current body orientation

    # foot_history.append(np.copy(foot_locations.T))
    data_storage.append(foot_locations.T)
    data_arr = np.array(data_storage)

    # sim.action(controller_to_sim(joints))
    sim.set(controller_to_sim(joints))

    for _ in range(substeps):
        sim.step()

    foot1.setData(x=data_arr[:, 0, 0], y=data_arr[:, 0, 2])
    foot2.setData(x=data_arr[:, 1, 0], y=data_arr[:, 1, 2])
    foot3.setData(x=data_arr[:, 2, 0], y=data_arr[:, 2, 2])
    foot4.setData(x=data_arr[:, 3, 0], y=data_arr[:, 3, 2])
Esempio n. 2
0
    def step(self, action):
        action_clean = self.sanitize_actions(action)

        pos_before, _, _ = self.sim.get_pos_orn_vel()

        # The action command only sets the goals of the motors. It doesn't actually step the simulation forward in
        # time. Instead of feeding the simulator the action directly, we take the mean of the last N actions,
        # where N comes from the action_smoothing hyper-parameter
        self.action_smoothing.append(action_clean)
        action_agent = np.mean(self.action_smoothing, axis=0)
        action_gait = controller_to_sim(
            self.gait.act(velocity_horizontal=(0.2, 0), normalized=False))
        action = self.gait_factor * action_gait + (
            1 - self.gait_factor) * action_agent

        # let's clip again just to be safe and within the boundaries of the expert
        action = np.clip(
            action,
            np.max((self.joints_hard_limit_lower,
                    action_gait + 0.2 * self.joints_hard_limit_lower),
                   axis=0),
            np.min((self.joints_hard_limit_uppper,
                    action_gait + 0.2 * self.joints_hard_limit_uppper),
                   axis=0),
        )

        self.sim.action(action)

        for _ in range(self.sim_steps):
            self.sim.step()

        pos_after, orn_after, _ = self.sim.get_pos_orn_vel()

        obs = self.get_obs()

        # this reward calculation is taken verbatim from halfcheetah-v2, save
        reward_ctrl = -np.square(action).sum()
        reward_run = (pos_after[0] - pos_before[0]) / self.dt
        # technically we should divide the next line by (3*pi^2) but that's really hard to reach
        reward_stable = -np.square(orn_after).sum() / np.square(np.pi)
        reward = self.rcoeff_ctrl * reward_ctrl + self.rcoeff_run * reward_run + self.rcoeff_stable * reward_stable

        done = False
        self.episode_steps += 1
        if self.episode_steps == self.episode_steps_max:
            done = True

        if self.stop_on_flip:
            flipped = self.sim.check_collision()
            if flipped:
                done = True
                reward -= 1000

        return obs, reward, done, dict(reward_run=reward_run,
                                       reward_ctrl=reward_ctrl,
                                       reward_stable=reward_stable)
Esempio n. 3
0
    def step(self, action):

        self.start_step()

        self.old_pos, _, _ = self.sim.get_pos_orn_vel()

        foot_position = self._get_action(action)
        joints = ik(foot_position.T,
                    self.quat)  # replace with current body orientation
        # joints = ik(action, self.quat)
        self.sim.action(controller_to_sim(joints))

        for _ in range(self.sim_steps):
            self.sim.step()

        self.episode_steps += 1

        # return self._get_obs(), self._get_reward(action), self._get_done(), self._get_info()
        return self._get_obs(), 0, self._get_done(), self._get_info()
Esempio n. 4
0
    def sanitize_actions(self, actions):
        assert len(actions) == 12
        actions_clipped = (np.clip(actions, -1, 1).astype(np.float32) + 1) / 2

        # denormalize actions to correspond to foot offset
        actions_denorm = actions_clipped * self.foot_ranges_diff + self.foot_ranges[:, 0]

        # stepping the gait to get the foot positions
        self.gait.act(velocity_horizontal=(0.2, 0), normalized=False)

        # combine the gait with the offset, half-half
        actions_merged = 0.5 * actions_denorm + 0.5 * self.gait.state.foot_locations.T.flatten()

        # make sure feet don't exceed the foot max range
        clipped_feet = np.clip(actions_merged, self.foot_ranges[:, 0], self.foot_ranges[:, 1])

        # reshape and apply inverse kinematics to get joints from feet
        clipped_feet = clipped_feet.reshape(4, 3).T
        joints = self.gait.controller.inverse_kinematics(clipped_feet, self.gait.controller.config)

        # convert joints from 3,4 repr to sim-usable one
        joints = controller_to_sim(joints)

        return joints
Esempio n. 5
0
    def _get_action(self, action):
        """
         We do not give raw action to the agent, we need some processing on the raw action values.
        :param action: raw action
        :return: processed action
        """
        action_clean = self.sanitize_actions(action)
        # The action command only sets the goals of the motors. It doesn't actually step the simulation forward in
        # time. Instead of feeding the simulator the action directly, we take the mean of the last N actions,
        # where N comes from the action_smoothing hyper-parameter
        self.action_smoothing.append(action_clean)
        action_agent = np.mean(self.action_smoothing, axis=0)

        # TODO: Verify this, there an array of 2 and legs actions are in first dim, I added [0]
        #  (maybe because of an update of gait for the arm actions ?)
        action_gait = controller_to_sim(self.gait.act(velocity_horizontal=(0.2, 0), normalized=False)[0])
        action = self.gait_factor * action_gait + (1 - self.gait_factor) * action_agent

        # let's clip again just to be safe and within the boundaries of the expert
        return np.clip(
            action,
            np.max((self.joints_hard_limit_lower, action_gait + 0.2 * self.joints_hard_limit_lower), axis=0),
            np.min((self.joints_hard_limit_uppper, action_gait + 0.2 * self.joints_hard_limit_uppper), axis=0),
        )
Esempio n. 6
0
import time

import gym
import gym_pupper
from gym_pupper.common.Utilities import controller_to_sim
from gym_pupper.pupper.policy import HardPolicy

ROLLOUTS = 5
env = gym.make(
    "Pupper-Walk-Absolute-aScale_1.0-gFact_0.0-RandomZRot_1-Graphical-v0")

for run in range(ROLLOUTS):
    state = env.reset()
    policy = HardPolicy()
    while True:
        action = controller_to_sim(
            policy.act(velocity_horizontal=(0.2, 0), normalized=True))
        print(action)
        quit()
        next_state, rew, done, misc = env.step(action)
        if done:
            break
        time.sleep(1 / 60)
Esempio n. 7
0
    #     txt = "arm up"
    # elif 60 * 41 < i <= 60 * 47:
    #     vel_arm = (0, 0.0001, -0.001)
    #     txt = "arm down"
    # else:
    #     quit()

    # if 60 * 29 < i:
    #     vel_body = (idle_walk_fw, 0)

    action, action_arm = policy.act(velocity_horizontal=(0.2, 0),
                                    yaw_rate=0.8,
                                    normalized=True,
                                    velocity_arm=(0, 0, 0))
    # print(action_arm)
    action = controller_to_sim(action) * np.pi  # denormalize
    arm_angles.append(action_arm)
    # print(i // 60, policy.state.arm_pos, action_arm)
    sim.action(action)
    sim.action_arm(action_arm)
    for _ in range(substeps):
        sim.step()
        # time.sleep(0.01)

    if i % 2 == 0:
        img, _ = sim.take_photo(follow_bot=False,
                                camera_offset=(0.5, 0.4, 0.4),
                                lookat_offset=(0.5, 0, 0))
        font = cv2.FONT_HERSHEY_SIMPLEX
        img = np.array(img)
        # cv2.putText(img, txt, (200, 200), font, 1, (255, 106, 213), 2, cv2.LINE_AA)
Esempio n. 8
0
    config = SimpleConfig(dt=1 / FPS)
    controller = SimpleController(config)
    foot_locations = DEFAULT_STANCE + np.array([0, 0, DEFAULT_Z_REF
                                                ])[:, np.newaxis]

    sim = PupperSim2(with_arm=False,
                     frequency=240,
                     debug=True,
                     img_size=(512, 256))
    # sim.make_kinematic((0.588, 0.419, 1, 1))  # use with sim.set(), otherwise use sim.action()
    sim.reset()
    # sim.action_arm([0, 0, 0])
    # sim.change_color((0.588, 0.419, 1, 1))

    for tick in trange(steps):
        horizontal_velocity = (0.2, 0.0)
        yaw_rate = 0.0
        target_height = -0.16
        current_height = target_height
        foot_locations = controller.step_gait(foot_locations,
                                              np.array(horizontal_velocity),
                                              yaw_rate, target_height,
                                              current_height, tick)
        joints = ik(foot_locations,
                    quat)  # replace with current body orientation
        sim.action(controller_to_sim(joints))
        # sim.set(controller_to_sim(joints))

        for _ in range(substeps):
            sim.step()
Esempio n. 9
0
policy = HardPolicy()
last_reset_val = -1
last_plot_val = -1
joints = []
counter = 0

while True:
    vel = [0, 0]
    for param_idx, param in enumerate(debug_params):
        val = sim.p.readUserDebugParameter(param)
        if param_idx < 2:
            vel[param_idx] = val
        if param_idx == 2 and val != last_reset_val:
            reset_sim()
            last_reset_val = val
        if param_idx == 3 and val != last_plot_val:
            if len(joints) > 0:
                plot_joints(joints)
            last_plot_val = val
            joints.clear()

    cmd, _ = policy.act(velocity_horizontal=vel)
    action = controller_to_sim(cmd)
    joints.append(np.copy(action))
    sim.action(action)

    for _ in range(substeps):
        sim.step()

    time.sleep(1 / FPS)
Esempio n. 10
0
from gym_pupper.pupper.policy import HardPolicy

FREQ_CTRL = 60
TEST_BODY = False
TEST_ARM = True

policy = HardPolicy(fps=FREQ_CTRL)


########### BODY
if TEST_BODY:
    iterations = 10_000
    times = 0
    for _ in range(iterations):
        start = time.time()
        _ = controller_to_sim(policy.act(velocity_horizontal=(0.2, 0), yaw_rate=1.0, normalized=True)) * np.pi
        diff = time.time() - start
        times += diff

    print(
        f"BODY: ran {iterations} iteractions at {np.around(iterations/times,1)}Hz, "
        f"so avg of {times/iterations}s per iterations"
    )
    # MBP 2018, ran 100000 iteractions at 1749.8Hz, so avg of 0.000571490352153778s per iterations

########### ARM
if TEST_ARM:
    iterations = 100
    times = 0
    for _ in range(iterations):
        start = time.time()
Esempio n. 11
0
import numpy as np
from scipy import fftpack
from tqdm import trange
import matplotlib.pyplot as plt
from gym_pupper.common.Utilities import controller_to_sim
from gym_pupper.pupper.policy import HardPolicy

policy = HardPolicy()
steps = 2000
joints = []
feet = []
for _ in trange(steps):
    joints_legs, _ = policy.act(velocity_horizontal=(0.2, 0),
                                yaw_rate=0.0,
                                normalized=False)
    action = controller_to_sim(joints_legs)
    joints.append(action)
    feet.append(policy.state.foot_locations.T)

feet = np.array(feet)
print(feet.shape)

x = np.arange(0, steps)
fig, axs = plt.subplots(2, 2, sharex=True, sharey=True)
for foot in range(4):
    im = axs[foot // 2][foot % 2].scatter(feet[:, foot, 0],
                                          feet[:, foot, 2],
                                          alpha=0.5,
                                          c=x,
                                          cmap="viridis")
    # axs[foot%2][foot//2].legend()