Python HardPolicyの例、gym_pupper.pupper.policy.HardPolicy Pythonの例

コード例 #1

0

ファイルを表示

    def reset(self):
        self.episode_steps = 0
        self.sim.reset(rest=True)  # also stand up the robot

        self.gait = HardPolicy()

        return self.get_obs()

コード例 #2

0

ファイルを表示

    def reset(self):
        self.episode_steps = 0

        # both when the action formulation is incremental and when it's relative, we need to start standing
        self.sim.reset(rest=True)  # also stand up the robot
        # for _ in range(10):
        #     self.sim.step()

        # this is used when self.incremental_action == True
        self.current_action = self.sim.get_rest_pos()

        self.gait = HardPolicy()

        return self.get_obs()

コード例 #3

0

ファイルを表示

    def reset(self):

        self.episode_steps = 0

        # both when the action formulation is incremental and when it's relative, we need to start standing
        self.sim.reset(rest=True)  # also stand up the robot

        # Add elements in our environment
        self._reset_simulation_env()

        # this is used when self.incremental_action == True
        self.current_action = self.sim.get_rest_pos()

        # Our expert policy
        self.gait = HardPolicy()

        return self._get_obs()

コード例 #4

0

ファイルを表示

import time

import gym
import gym_pupper
from gym_pupper.common.Utilities import controller_to_sim
from gym_pupper.pupper.policy import HardPolicy

ROLLOUTS = 5
env = gym.make(
    "Pupper-Walk-Absolute-aScale_1.0-gFact_0.0-RandomZRot_1-Graphical-v0")

for run in range(ROLLOUTS):
    state = env.reset()
    policy = HardPolicy()
    while True:
        action = controller_to_sim(
            policy.act(velocity_horizontal=(0.2, 0), normalized=True))
        print(action)
        quit()
        next_state, rew, done, misc = env.step(action)
        if done:
            break
        time.sleep(1 / 60)

コード例 #5

0

ファイルを表示

class FootOffsetEnv(gym.Env):
    def __init__(
        self,
        debug=False,
        steps=120,
        control_freq=CONTROL_FREQUENCY,
        action_scaling=1.0,
        random_rot=(0, 0, 0),
        reward_coefficients=(0.1, 1, 1),
        stop_on_flip=False,
    ):
        """ Gym-compatible environment to teach the pupper how to walk - with predefined gait as baseline
        """

        super().__init__()

        # observation space:
        # - 12 lef joints in the order
        #   - front right hip
        #   - front right upper leg
        #   - front right lower leg
        #   - front left hip/upper/lower leg
        #   - back right hip/upper/lower leg
        #   - back left hip/upper/lower leg
        # - 3 body orientation in euler angles
        # - 2 linear velocity (only along the plane, we don't care about z velocity

        self.observation_space = spaces.Box(low=-1, high=1, shape=(12 + 3 + 2,), dtype=np.float32)

        # action = x/y/z offset for all 4 feet
        self.action_space = spaces.Box(low=-1, high=1, shape=(12,), dtype=np.float32)

        # turning off start_standing because the that's done in self.reset()
        kwargs = {}
        # if relative_action: # if this is turned on, the robot is underpowered for the hardcoded gait
        #     kwargs = {"gain_pos": 1 / 16, "gain_vel": 1 / 8, "max_torque": 1 / 2}

        self.sim = PupperSim2(debug=debug, start_standing=False, **kwargs)
        self.episode_steps = 0
        self.episode_steps_max = steps
        self.control_freq = control_freq
        self.dt = 1 / self.control_freq
        self.incremental_angle = np.deg2rad(MAX_ANGLE_PER_SEC) / self.control_freq
        self.sim_steps = int(round(FREQ_SIM / control_freq))
        print(f"Running with {self.sim_steps} substeps")
        self.action_scaling = action_scaling
        self.random_rot = random_rot
        self.stop_on_flip = stop_on_flip
        self.current_action = np.array([0] * 12)
        self.gait = None
        ranges = np.load(f"{ASSET_DIR}/joint_foot_ranges.npz")
        self.foot_ranges = ranges["foot_ranges"].reshape(12, 2)
        self.foot_ranges_diff = self.foot_ranges[:, 1] - self.foot_ranges[:, 0]
        # extend the allowed foot ranges by 25% of the range on that axis
        print("fr before", self.foot_ranges)
        self.foot_ranges = np.array(
            [
                (fr[0] - 0.25 * fd, fr[1] + 0.25 * fd) if fd > 0 else (fr[0] - 0.1, fr[1] + 0.1)
                for fr, fd in zip(self.foot_ranges, self.foot_ranges_diff)
            ]
        )
        self.foot_ranges_diff[self.foot_ranges_diff == 0] = 0.2  # corresponding to min dist
        print("fr after", self.foot_ranges)
        self.joint_ranges = ranges["joint_ranges"]
        print("joint ranges", self.joint_ranges)

        # new reward coefficients
        self.rcoeff_ctrl, self.rcoeff_run, self.rcoeff_stable = reward_coefficients

    def reset(self):
        self.episode_steps = 0
        self.sim.reset(rest=True)  # also stand up the robot

        self.gait = HardPolicy()

        return self.get_obs()

    def seed(self, seed=None):
        np.random.seed(seed)
        return super().seed(seed)

    def close(self):
        self.sim.p.disconnect()
        super().close()

    def sanitize_actions(self, actions):
        assert len(actions) == 12
        actions_clipped = (np.clip(actions, -1, 1).astype(np.float32) + 1) / 2

        # denormalize actions to correspond to foot offset
        actions_denorm = actions_clipped * self.foot_ranges_diff + self.foot_ranges[:, 0]

        # stepping the gait to get the foot positions
        self.gait.act(velocity_horizontal=(0.2, 0), normalized=False)

        # combine the gait with the offset, half-half
        actions_merged = 0.5 * actions_denorm + 0.5 * self.gait.state.foot_locations.T.flatten()

        # make sure feet don't exceed the foot max range
        clipped_feet = np.clip(actions_merged, self.foot_ranges[:, 0], self.foot_ranges[:, 1])

        # reshape and apply inverse kinematics to get joints from feet
        clipped_feet = clipped_feet.reshape(4, 3).T
        joints = self.gait.controller.inverse_kinematics(clipped_feet, self.gait.controller.config)

        # convert joints from 3,4 repr to sim-usable one
        joints = controller_to_sim(joints)

        return joints

    def get_obs(self):
        pos, orn, vel = self.sim.get_pos_orn_vel()

        joint_states = np.array(self.sim.get_joint_states())
        # per-joint normalization based on the max useful range
        joint_states = np.array(
            [(js - rngs[0]) / (rngs[1] - rngs[0]) for js, rngs in zip(joint_states, self.joint_ranges)]
        ) / (np.pi * 1.25)

        obs = list(joint_states) + list(orn) + list(vel)[:2]
        return obs

    def step(self, action):
        action_clean = self.sanitize_actions(action)

        pos_before, _, _ = self.sim.get_pos_orn_vel()

        self.sim.action(action_clean)

        for _ in range(self.sim_steps):
            self.sim.step()

        pos_after, orn_after, _ = self.sim.get_pos_orn_vel()

        obs = self.get_obs()

        # this reward calculation is taken verbatim from halfcheetah-v2, save
        reward_ctrl = -np.square(action).sum()
        reward_run = (pos_after[0] - pos_before[0]) / self.dt
        # technically we should divide the next line by (3*pi^2) but that's really hard to reach
        reward_stable = -np.square(orn_after).sum() / np.square(np.pi)
        reward = self.rcoeff_ctrl * reward_ctrl + self.rcoeff_run * reward_run + self.rcoeff_stable * reward_stable

        done = False
        self.episode_steps += 1
        if self.episode_steps == self.episode_steps_max:
            done = True

        return obs, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl, reward_stable=reward_stable)

    def render(self, mode="human"):
        # todo: if the mode=="human" then this should open and display a
        #  window a la "cv2.imshow('xxx',img), cv2.waitkey(10)"

        img, _ = self.sim.take_photo()
        return img

コード例 #6

0

ファイルを表示

import gym_pupper
from gym_pupper.common.Utilities import controller_to_sim, ik_to_sim
from gym_pupper.pupper.policy import HardPolicy
from gym_pupper.sim.simulator2 import PupperSim2

FREQ_SIM = 240
FREQ_CTRL = 60
substeps = int(FREQ_SIM / FREQ_CTRL)
sim = PupperSim2(with_arm=True,
                 frequency=FREQ_SIM,
                 debug=True,
                 img_size=(512, 256))
sim.reset()
sim.change_color((0.588, 0.419, 1, 1))
# time.sleep(1)
policy = HardPolicy(fps=FREQ_CTRL, warmup=10)

arm_rest = np.array([0, 1, -0.8]) * np.pi / 2
sim.action_arm(arm_rest)
arm_angles = []

for i in trange(60 * 47):

    # action, _ = controller_to_sim(policy.act(velocity_horizontal=(-0.2, 0), normalized=True)) * np.pi
    # action, _ = controller_to_sim(policy.act(velocity_horizontal=(0.2, 0), yaw_rate=1.0, normalized=True)) * np.pi
    # idle_walk_fw = 0.008
    #
    # vel_body = (0, 0)
    # yaw = 0
    # vel_arm = (0, 0, 0)  # (+0.001, +0.002, 0)
    # if 0 <= i <= 60 * 5:

コード例 #7

0

ファイルを表示

    plt.tight_layout()
    plt.title("sim direct joints")
    plt.show()


reset_sim()

debug_params = []

for param in ["velocity fwd", "velocity left"]:
    debug_params.append(sim.p.addUserDebugParameter(param, -1, 1, 0))
debug_params.append(sim.p.addUserDebugParameter("reset", 1, 0, 1))
debug_params.append(sim.p.addUserDebugParameter("plot joints", 1, 0, 1))
# debug_params.append(sim.p.addUserDebugParameter("zero", 1, 0, 1))

policy = HardPolicy()
last_reset_val = -1
last_plot_val = -1
joints = []
counter = 0

while True:
    vel = [0, 0]
    for param_idx, param in enumerate(debug_params):
        val = sim.p.readUserDebugParameter(param)
        if param_idx < 2:
            vel[param_idx] = val
        if param_idx == 2 and val != last_reset_val:
            reset_sim()
            last_reset_val = val
        if param_idx == 3 and val != last_plot_val:

コード例 #8

0

ファイルを表示

import time

import numpy as np

from gym_pupper.common.Utilities import controller_to_sim
from gym_pupper.pupper.policy import HardPolicy

FREQ_CTRL = 60
TEST_BODY = False
TEST_ARM = True

policy = HardPolicy(fps=FREQ_CTRL)


########### BODY
if TEST_BODY:
    iterations = 10_000
    times = 0
    for _ in range(iterations):
        start = time.time()
        _ = controller_to_sim(policy.act(velocity_horizontal=(0.2, 0), yaw_rate=1.0, normalized=True)) * np.pi
        diff = time.time() - start
        times += diff

    print(
        f"BODY: ran {iterations} iteractions at {np.around(iterations/times,1)}Hz, "
        f"so avg of {times/iterations}s per iterations"
    )
    # MBP 2018, ran 100000 iteractions at 1749.8Hz, so avg of 0.000571490352153778s per iterations

########### ARM

コード例 #9

0

ファイルを表示

import numpy as np
from scipy import fftpack
from tqdm import trange
import matplotlib.pyplot as plt
from gym_pupper.common.Utilities import controller_to_sim
from gym_pupper.pupper.policy import HardPolicy

policy = HardPolicy()
steps = 2000
joints = []
feet = []
for _ in trange(steps):
    joints_legs, _ = policy.act(velocity_horizontal=(0.2, 0),
                                yaw_rate=0.0,
                                normalized=False)
    action = controller_to_sim(joints_legs)
    joints.append(action)
    feet.append(policy.state.foot_locations.T)

feet = np.array(feet)
print(feet.shape)

x = np.arange(0, steps)
fig, axs = plt.subplots(2, 2, sharex=True, sharey=True)
for foot in range(4):
    im = axs[foot // 2][foot % 2].scatter(feet[:, foot, 0],
                                          feet[:, foot, 2],
                                          alpha=0.5,
                                          c=x,
                                          cmap="viridis")
    # axs[foot%2][foot//2].legend()

コード例 #10

0

ファイルを表示

class WalkingEnv(gym.Env):
    def __init__(
        self,
        debug=False,
        steps=120,
        control_freq=CONTROL_FREQUENCY,
        relative_action=True,
        incremental_action=False,
        action_scaling=1.0,
        action_smoothing=1,
        random_rot=(0, 0, 0),
        reward_coefficients=(0.1, 1, 0),
        stop_on_flip=False,
        gait_factor=0.0,
    ):
        """ Gym-compatible environment to teach the pupper how to walk
        
        :param bool debug: If True, shows PyBullet in GUI mode. Set to False when training.  
        :param int steps: How long is the episode? Each step = one call to WalkingEnv.step()
        :param int control_freq: How many simulation steps are there in a second of Pybullet sim. Pybullet always runs
            at 240Hz but that's not optimal for RL control.
        :param bool relative_action: If set to True, then all actions are added to the resting position.
            This give the robot a more stable starting position.
        """

        super().__init__()

        # observation space:
        # - 12 lef joints in the order
        #   - front right hip
        #   - front right upper leg
        #   - front right lower leg
        #   - front left hip/upper/lower leg
        #   - back right hip/upper/lower leg
        #   - back left hip/upper/lower leg
        # - 3 body orientation in euler angles
        # - 2 linear velocity (only along the plane, we don't care about z velocity

        self.observation_space = spaces.Box(low=-1,
                                            high=1,
                                            shape=(12 + 3 + 2, ),
                                            dtype=np.float32)

        # action = 12 joint angles in the same order as above (fr/fl/br/bl and each with hip/upper/lower)
        self.action_space = spaces.Box(low=-1,
                                       high=1,
                                       shape=(12, ),
                                       dtype=np.float32)

        # turning off start_standing because the that's done in self.reset()
        kwargs = {}
        # if relative_action: # if this is turned on, the robot is underpowered for the hardcoded gait
        #     kwargs = {"gain_pos": 1 / 16, "gain_vel": 1 / 8, "max_torque": 1 / 2}

        self.sim = PupperSim2(debug=debug, start_standing=False, **kwargs)
        self.episode_steps = 0
        self.episode_steps_max = steps
        self.control_freq = control_freq
        self.dt = 1 / self.control_freq
        self.incremental_angle = np.deg2rad(
            MAX_ANGLE_PER_SEC) / self.control_freq
        self.sim_steps = int(round(FREQ_SIM / control_freq))
        self.relative_action = relative_action
        self.incremental_action = incremental_action
        self.action_scaling = action_scaling
        self.action_smoothing = deque(maxlen=action_smoothing)
        self.random_rot = random_rot
        self.stop_on_flip = stop_on_flip
        self.current_action = np.array([0] * 12)
        self.gait = None
        assert 0 <= gait_factor <= 1
        self.gait_factor = gait_factor
        self.joints_hard_limit_lower = (-np.pi + 0.001) * np.ones(12)
        self.joints_hard_limit_uppper = (np.pi - 0.001) * np.ones(12)

        # new reward coefficients
        self.rcoeff_ctrl, self.rcoeff_run, self.rcoeff_stable = reward_coefficients

    def reset(self):
        self.episode_steps = 0

        # both when the action formulation is incremental and when it's relative, we need to start standing
        self.sim.reset(rest=True)  # also stand up the robot
        # for _ in range(10):
        #     self.sim.step()

        # this is used when self.incremental_action == True
        self.current_action = self.sim.get_rest_pos()

        self.gait = HardPolicy()

        return self.get_obs()

    def seed(self, seed=None):
        np.random.seed(seed)
        return super().seed(seed)

    def close(self):
        self.sim.p.disconnect()
        super().close()

    def sanitize_actions(self, actions):
        assert len(actions) == 12

        actions = np.array(actions).astype(np.float32)

        if not self.incremental_action:
            scaled = actions * np.pi * self.action_scaling  # because 1/-1 corresponds to pi/-pi radians rotation
            if self.relative_action:
                scaled += self.sim.get_rest_pos()
            # this enforces an action range of -1/1, except if it's relative action - then the action space is asymmetric
        else:
            scaled = actions * self.incremental_angle + self.current_action

        clipped = np.clip(scaled, self.joints_hard_limit_lower,
                          self.joints_hard_limit_uppper)
        self.current_action = np.copy(clipped)
        return clipped

    def get_obs(self):
        pos, orn, vel = self.sim.get_pos_orn_vel()

        joint_states = np.array(
            self.sim.get_joint_states()) / np.pi  # to normalize to [-1,1]
        obs = list(joint_states) + list(orn) + list(vel)[:2]
        return obs

    def step(self, action):
        action_clean = self.sanitize_actions(action)

        pos_before, _, _ = self.sim.get_pos_orn_vel()

        # The action command only sets the goals of the motors. It doesn't actually step the simulation forward in
        # time. Instead of feeding the simulator the action directly, we take the mean of the last N actions,
        # where N comes from the action_smoothing hyper-parameter
        self.action_smoothing.append(action_clean)
        action_agent = np.mean(self.action_smoothing, axis=0)
        action_gait = controller_to_sim(
            self.gait.act(velocity_horizontal=(0.2, 0), normalized=False))
        action = self.gait_factor * action_gait + (
            1 - self.gait_factor) * action_agent

        # let's clip again just to be safe and within the boundaries of the expert
        action = np.clip(
            action,
            np.max((self.joints_hard_limit_lower,
                    action_gait + 0.2 * self.joints_hard_limit_lower),
                   axis=0),
            np.min((self.joints_hard_limit_uppper,
                    action_gait + 0.2 * self.joints_hard_limit_uppper),
                   axis=0),
        )

        self.sim.action(action)

        for _ in range(self.sim_steps):
            self.sim.step()

        pos_after, orn_after, _ = self.sim.get_pos_orn_vel()

        obs = self.get_obs()

        # this reward calculation is taken verbatim from halfcheetah-v2, save
        reward_ctrl = -np.square(action).sum()
        reward_run = (pos_after[0] - pos_before[0]) / self.dt
        # technically we should divide the next line by (3*pi^2) but that's really hard to reach
        reward_stable = -np.square(orn_after).sum() / np.square(np.pi)
        reward = self.rcoeff_ctrl * reward_ctrl + self.rcoeff_run * reward_run + self.rcoeff_stable * reward_stable

        done = False
        self.episode_steps += 1
        if self.episode_steps == self.episode_steps_max:
            done = True

        if self.stop_on_flip:
            flipped = self.sim.check_collision()
            if flipped:
                done = True
                reward -= 1000

        return obs, reward, done, dict(reward_run=reward_run,
                                       reward_ctrl=reward_ctrl,
                                       reward_stable=reward_stable)

    def render(self, mode="human"):
        # todo: if the mode=="human" then this should open and display a
        #  window a la "cv2.imshow('xxx',img), cv2.waitkey(10)"

        img, _ = self.sim.take_photo()
        return img