def __init__(self,
               headless=False,
               simple=False,
               max_force=1000,
               max_vel=100,
               goal_halfsphere=False,
               backlash=.1,
               double_goal=False):
    self.simple = simple
    self.max_force = max_force
    self.max_vel = max_vel
    self.double_goal = double_goal

    self.robot = SingleRobot(
        debug=not headless, heavy=True, new_backlash=backlash, silent=True)
    self.ball = Ball(1)
    self.rhis = RandomPointInHalfSphere(
        0.0,
        3.69,
        4.37,
        radius=RADIUS,
        height=26.10,
        min_dist=10.,
        halfsphere=goal_halfsphere)
    self.goal = None
    self.goals_done = 0
    self.goal_dirty = False
    self.dist = DistanceBetweenObjects(
        bodyA=self.robot.id, bodyB=self.ball.id, linkA=19, linkB=1)
    self.episodes = 0  # used for resetting the sim every so often
    self.restart_every_n_episodes = 1000

    self.force_urdf_reload = False

    self.metadata = {'render.modes': ['human']}

    if not simple:
      # observation = 6 joints + 6 velocities + 3 coordinates for target
      self.observation_space = spaces.Box(
          low=-1, high=1, shape=(6 + 6 + 3,), dtype=np.float32)  #
      # action = 6 joint angles
      self.action_space = spaces.Box(
          low=-1, high=1, shape=(6,), dtype=np.float32)  #

    else:
      # observation = 4 joints + 4 velocities + 2 coordinates for target
      self.observation_space = spaces.Box(
          low=-1, high=1, shape=(4 + 4 + 2,), dtype=np.float32)  #
      # action = 4 joint angles
      self.action_space = spaces.Box(
          low=-1, high=1, shape=(4,), dtype=np.float32)  #

    super().__init__()
 def __init__(self, action_noise, num_retries, task):
     self.noise = action_noise
     self.retries = num_retries
     self.task = task
     if task == 'pusher':
         self.robot = PusherRobotNoisy()
     else:
         self.robot = SingleRobot(debug=False)
     self.action_len = len(self.robot.motor_ids)
     self._nn_set = learners.NNSet()
     np.random.seed(seed=225)
     random.seed(225)
Esempio n. 3
0
 def __init__(self, action_noise, num_retries, task):
     self.noise = action_noise
     self.retries = num_retries
     self.task = task
     if task == 'pusher':
         self.env = gym.make('Nas-Pusher-3dof-Vanilla-v1')
         self.action_len = self.env.action_space.shape[0]
     else:
         self.robot = SingleRobot(debug=False)
         self.action_len = len(self.robot.motor_ids)
     self._nn_set = learners.NNSet()
     np.random.seed(seed=225)
     random.seed(225)
    def __init__(self, headless=False, cube_spawn="linear", touchy=False):
        assert cube_spawn in ["linear", "square"]
        self.cube_spawn = cube_spawn
        self.touchy = touchy

        self.goals_done = 0
        self.is_initialized = False

        self.robot = SingleRobot(robot_model="ergojr-gripper",
                                 debug=not headless,
                                 gripper=True,
                                 reset=False,
                                 frequency=60)

        # Cube the robot must reach, created in reset method
        self.cube = None

        self.cam = Cam(pos=[0.25, .25, 0.1],
                       look_at=[0.00, .10, 0.1],
                       width=64,
                       height=64,
                       fov=60)

        self.plot = None

        # Episode count, used for resetting the PyBullet sim every so often
        self.episodes = 0

        self.metadata = {'render.modes': ['human', 'rgb_array']}

        # observation = (img, 6 joints + 6 velocities + 3 cube position)
        self.observation_space = spaces.Tuple([
            spaces.Box(low=0, high=255, shape=(64, 64, 3), dtype=np.uint8),
            spaces.Box(low=-1,
                       high=1,
                       shape=(3 + 3 + 2 + 2, ),
                       dtype=np.float32)
        ])

        # 6 joint angles in [-1, 1]
        self.action_space = spaces.Box(low=-1,
                                       high=1,
                                       shape=(6, ),
                                       dtype=np.float32)

        super().__init__()
class ErgoGripperEnv(gym.Env):
    def __init__(self, headless=False, cube_spawn="linear", touchy=False):
        assert cube_spawn in ["linear", "square"]
        self.cube_spawn = cube_spawn
        self.touchy = touchy

        self.goals_done = 0
        self.is_initialized = False

        self.robot = SingleRobot(robot_model="ergojr-gripper",
                                 debug=not headless,
                                 gripper=True,
                                 reset=False,
                                 frequency=60)

        # Cube the robot must reach, created in reset method
        self.cube = None

        self.cam = Cam(pos=[0.25, .25, 0.1],
                       look_at=[0.00, .10, 0.1],
                       width=64,
                       height=64,
                       fov=60)

        self.plot = None

        # Episode count, used for resetting the PyBullet sim every so often
        self.episodes = 0

        self.metadata = {'render.modes': ['human', 'rgb_array']}

        # observation = (img, 6 joints + 6 velocities + 3 cube position)
        self.observation_space = spaces.Tuple([
            spaces.Box(low=0, high=255, shape=(64, 64, 3), dtype=np.uint8),
            spaces.Box(low=-1,
                       high=1,
                       shape=(3 + 3 + 2 + 2, ),
                       dtype=np.float32)
        ])

        # 6 joint angles in [-1, 1]
        self.action_space = spaces.Box(low=-1,
                                       high=1,
                                       shape=(6, ),
                                       dtype=np.float32)

        super().__init__()

    def seed(self, seed=None):
        return [np.random.seed(seed)]

    def step(self, action):
        self.robot.act2(action, max_vel=.8, max_force=.8)

        for _ in range(FRAME_SKIP):
            self.robot.step()

        reward, done, dist = self._getReward()

        obs = self._get_obs()
        return obs, reward, done, {"distance": dist}

    def get_collision(self, bodyB):
        outA = p.getContactPoints(bodyA=self.robot.id,
                                  bodyB=bodyB,
                                  linkIndexA=14,
                                  linkIndexB=0)
        outB = p.getContactPoints(bodyA=self.robot.id,
                                  bodyB=bodyB,
                                  linkIndexA=13,
                                  linkIndexB=0)
        return len(outA) + len(outB)

    def _getReward(self):
        done = False

        reward = self.cube.dbo.query()
        distance = reward.copy()

        collision = self.get_collision(self.cube.cube)

        reward *= -1  # the reward is the inverse distance
        if distance < GOAL_REACHED_DISTANCE or (
                self.touchy
                and collision > 0):  # this is a bit arbitrary, but works well
            done = True
            reward = 1

        return reward, done, distance

    def reset(self, forced=False):
        self.episodes += 1
        if self.episodes >= RESTART_EVERY_N_EPISODES or forced or not self.is_initialized:
            if self.cube is not None:
                self.cube.cleanup()

            self.robot.hard_reset()  # this always has to go first

            self.cube = Cube(self.robot.id, self.cube_spawn)
            self.cube.reset()
            self.episodes = 0
            self.is_initialized = True
        else:
            self.cube.reset()

        qpos = self.robot.rest_pos.copy()
        qpos += np.random.uniform(low=-0.2, high=0.2, size=6)

        qposvel = np.zeros(12, dtype=np.float32)
        qposvel[:6] = qpos

        for _ in range(20):
            # to stabilize cube at lower framerates
            self.robot.step()

        self.robot.set(qposvel)
        self.robot.act2(qposvel[:6])
        self.robot.step()

        return self._get_obs()

    def _get_obs(self):
        obs = np.hstack([self.robot.observe(), self.cube.normalize_cube()])
        img = self.cam.snap()
        img = (img * 255).astype(np.uint8)
        return img, obs

    def render(self, mode='human', close=False):
        if mode == "human":
            img = self.cam.snap()
            if self.plot is None:
                plt.ion()
                self.plot_container = plt.imshow(img,
                                                 interpolation='none',
                                                 animated=True,
                                                 label="live feed")
                self.plot = plt.gca()

            self.plot_container.set_data(img)
            self.plot.plot([0])
            plt.pause(0.001)
        else:
            return self._get_obs()[0]

    def close(self):
        self.robot.close()

    def _get_state(self):
        return self.robot.observe()
import numpy as np
import matplotlib.pyplot as plt
from gym_ergojr.sim.single_robot import SingleRobot
from arguments import get_args
import os

robot = SingleRobot(debug=False)  # 6DOF Reacher Robot
args = get_args()

file_path = os.getcwd() + '/data/freq{}/{}'.format(args.freq, args.approach)

if not os.path.isdir(file_path):
    os.makedirs(file_path)

np.random.seed(seed=225)
total_steps = 10000 * 100
rest_interval = 10 * 100
freq = args.freq
count = 0
steps_until_resample = 100 / freq

sim_trajectories = np.zeros((total_steps, 12))
actions = np.zeros((total_steps, 6))
bad_actions = np.zeros((total_steps))
robot.reset()
robot.step()
end_pos = []

for epi in range(total_steps):

    if epi % rest_interval == 0:  # Take rest after every 10 * 100 steps
    def __init__(self,
                 headless=False,
                 simple=False,
                 backlash=False,
                 max_force=1,
                 max_vel=18,
                 goal_halfsphere=False,
                 multi_goal=False,
                 goals=3,
                 gripper=False):
        self.simple = simple
        self.backlash = backlash
        self.max_force = max_force
        self.max_vel = max_vel
        self.multigoal = multi_goal
        self.n_goals = goals
        self.gripper = gripper

        self.goals_done = 0
        self.is_initialized = False
        self.robot = SingleRobot(debug=not headless, backlash=backlash)
        self.ball = Ball()
        self.rhis = RandomPointInHalfSphere(0.0,
                                            0.0369,
                                            0.0437,
                                            radius=RADIUS,
                                            height=0.2610,
                                            min_dist=0.1,
                                            halfsphere=goal_halfsphere)
        self.goal = None
        self.dist = DistanceBetweenObjects(bodyA=self.robot.id,
                                           bodyB=self.ball.id,
                                           linkA=13,
                                           linkB=1)
        self.episodes = 0  # used for resetting the sim every so often
        self.restart_every_n_episodes = 1000

        self.metadata = {'render.modes': ['human']}

        if not simple and not gripper:  # default
            # observation = 6 joints + 6 velocities + 3 coordinates for target
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(6 + 6 + 3, ),
                                                dtype=np.float32)  #
            # action = 6 joint angles
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(6, ),
                                           dtype=np.float32)  #

        elif not gripper:  # simple
            # observation = 4 joints + 4 velocities + 2 coordinates for target
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(4 + 4 + 2, ),
                                                dtype=np.float32)  #
            # action = 4 joint angles
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(4, ),
                                           dtype=np.float32)  #
        else:  # gripper
            # observation = 3 joints + 3 velocities + 2 coordinates for target
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(3 + 3 + 2, ),
                                                dtype=np.float32)  #
            # action = 3 joint angles, [-,1,2,-,4,-]
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(3, ),
                                           dtype=np.float32)  #

        super().__init__()
class ErgoReacherEnv(gym.Env):
    def __init__(self,
                 headless=False,
                 simple=False,
                 backlash=False,
                 max_force=1,
                 max_vel=18,
                 goal_halfsphere=False,
                 multi_goal=False,
                 goals=3,
                 gripper=False):
        self.simple = simple
        self.backlash = backlash
        self.max_force = max_force
        self.max_vel = max_vel
        self.multigoal = multi_goal
        self.n_goals = goals
        self.gripper = gripper

        self.goals_done = 0
        self.is_initialized = False
        self.robot = SingleRobot(debug=not headless, backlash=backlash)
        self.ball = Ball()
        self.rhis = RandomPointInHalfSphere(0.0,
                                            0.0369,
                                            0.0437,
                                            radius=RADIUS,
                                            height=0.2610,
                                            min_dist=0.1,
                                            halfsphere=goal_halfsphere)
        self.goal = None
        self.dist = DistanceBetweenObjects(bodyA=self.robot.id,
                                           bodyB=self.ball.id,
                                           linkA=13,
                                           linkB=1)
        self.episodes = 0  # used for resetting the sim every so often
        self.restart_every_n_episodes = 1000

        self.metadata = {'render.modes': ['human']}

        if not simple and not gripper:  # default
            # observation = 6 joints + 6 velocities + 3 coordinates for target
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(6 + 6 + 3, ),
                                                dtype=np.float32)  #
            # action = 6 joint angles
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(6, ),
                                           dtype=np.float32)  #

        elif not gripper:  # simple
            # observation = 4 joints + 4 velocities + 2 coordinates for target
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(4 + 4 + 2, ),
                                                dtype=np.float32)  #
            # action = 4 joint angles
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(4, ),
                                           dtype=np.float32)  #
        else:  # gripper
            # observation = 3 joints + 3 velocities + 2 coordinates for target
            self.observation_space = spaces.Box(low=-1,
                                                high=1,
                                                shape=(3 + 3 + 2, ),
                                                dtype=np.float32)  #
            # action = 3 joint angles, [-,1,2,-,4,-]
            self.action_space = spaces.Box(low=-1,
                                           high=1,
                                           shape=(3, ),
                                           dtype=np.float32)  #

        super().__init__()

    def seed(self, seed=None):
        return [np.random.seed(seed)]

    def step(self, action):
        if self.simple or self.gripper:
            action_ = np.zeros(6, np.float32)
            if self.simple:
                action_[[1, 2, 4, 5]] = action
            if self.gripper:
                action_[[1, 2, 4]] = action
            action = action_

        self.robot.act2(action, max_force=self.max_force, max_vel=self.max_vel)
        self.robot.step()

        reward, done, dist = self._getReward()

        obs = self._get_obs()
        return obs, reward, done, {"distance": dist}

    def _getReward(self):
        done = False

        reward = self.dist.query()
        distance = reward.copy()
        if not self.multigoal:  # this is the normal mode
            reward *= -1  # the reward is the inverse distance
            if reward > GOAL_REACHED_DISTANCE:  # this is a bit arbitrary, but works well
                self.goals_done += 1
                done = True
                reward = 1
        else:
            if -reward > GOAL_REACHED_DISTANCE:
                self.goals_done += 1
                if self.goals_done == self.n_goals:
                    done = True
                else:
                    robot_state = self._get_obs()[:8]
                    self.move_ball()
                    self._set_state(
                        robot_state)  # move robot back after ball has movedÒ
                    self.robot.step()
                    reward = self.dist.query()

            reward = (self.goals_done * DIA +
                      (DIA - reward)) / (self.n_goals * DIA)

            if done:
                reward = 1

        if self.gripper:
            reward *= 10
            if self.goals_done == RESET_EVERY:
                self.goals_done = 0
                self.reset(True)
            done = False

            # normalize - [-1,1] range:
            # reward = reward * 2 - 1

        return reward, done, distance

    def _setDist(self):
        self.dist.bodyA = self.robot.id
        self.dist.bodyB = self.ball.id

    def move_ball(self):
        if self.simple or self.gripper:
            self.goal = self.rhis.sampleSimplePoint()
        else:
            self.goal = self.rhis.samplePoint()

        self.dist.goal = self.goal
        self.ball.changePos(self.goal, 4)

        for _ in range(20):
            self.robot.step()  # we need this to move the ball

    def reset(self, forced=False):
        self.goals_done = 0

        self.episodes += 1
        if self.episodes >= self.restart_every_n_episodes:
            self.robot.hard_reset()  # this always has to go first
            self.ball.hard_reset()
            self._setDist()
            self.episodes = 0

        if self.is_initialized:
            robot_state = self._get_state()

        self.move_ball()

        if self.gripper and self.is_initialized:
            self._set_state(
                robot_state[:6])  # move robot back after ball has movedÒ
            self.robot.step()

        if forced or not self.gripper:  # if it's the gripper
            qpos = np.random.uniform(low=-0.2, high=0.2, size=6)

            if self.simple:
                qpos[[0, 3]] = 0

            self.robot.reset()
            self.robot.set(np.hstack((qpos, [0] * 6)))
            self.robot.act2(np.hstack((qpos)))
            self.robot.step()

        self.is_initialized = True

        return self._get_obs()

    def _get_obs(self):
        obs = np.hstack([self.robot.observe(), self.rhis.normalize(self.goal)])
        if self.simple:
            obs = obs[[1, 2, 4, 5, 7, 8, 10, 11, 13, 14]]
        if self.gripper:
            obs = obs[[1, 2, 4, 7, 8, 10, 13, 14]]
        return obs

    def render(self, mode='human', close=False):
        pass

    def close(self):
        self.robot.close()

    def _get_state(self):
        return self.robot.observe()

    def _set_state(self, posvel):
        if self.simple or self.gripper:
            new_state = np.zeros((12), dtype=np.float32)
            if self.simple:
                new_state[[1, 2, 4, 5, 7, 8, 10, 11]] = posvel
            if self.gripper:
                new_state[[1, 2, 4, 7, 8, 10]] = posvel
        else:
            new_state = np.array(posvel)
        self.robot.set(new_state)
                    volatile=True)


def data_to_var_nosim(real_t1, action):
    return Variable(double_unsqueeze(
        torch.cat([
            torch.from_numpy(real_t1).float(),
            torch.from_numpy(action).float()
        ],
                  dim=0)),
                    volatile=True)


#### SIM

robot = SingleRobot(debug=False)
robot.set(ds.current_real[epi, 0])
robot.act2(ds.current_real[epi, 0, :6])
robot.step()
for frame in range(299):
    robot.act2(ds.action[epi, frame])
    robot.step()
    joints_sim[frame, :] = robot.observe()[:6]
    # time.sleep(0.1)
robot.close()

### REAL_SIM

for frame in range(299):
    joints_real[frame, :] = ds.current_real[epi, frame, :6]
Esempio n. 10
0
import time
from pypot.robot import from_remote
import numpy as np
from gym_ergojr.sim.single_robot import SingleRobot

robot_sim = SingleRobot(debug=True)
robot_real = from_remote('flogo2.local', 4242)

poses = [
    [45, 0, 0, 0, 0, 0],
    [0, 45, 0, 0, 0, 0],
    [0, 0, 45, 0, 0, 0],
    [0, 0, 0, 45, 0, 0],
    [0, 0, 0, 0, 45, 0],
    [0, 0, 0, 0, 0, 45],
]

for pose in poses:
    pose_sim = np.deg2rad(pose + [0] * 6) / (np.pi / 2)
    robot_sim.set(pose_sim)
    robot_sim.act2(pose_sim[:6])
    # robot_sim.step()
    for i in range(100):
        robot_sim.step()

    print(robot_sim.observe().round(1))
    for i, m in enumerate(robot_real.motors):
        m.goal_position = pose[i]

    time.sleep(2)
import time

from gym_ergojr.sim.single_robot import SingleRobot

robot = SingleRobot(debug=True)
robot.reset()
robot.step()

for i in range(100):
    # robot.act2([0,-1,0,0,-1,0]) # min y: -0.1436
    # robot.act2([0,1,-1,0,0,0]) # max y: 0.22358
    # robot.act2([0,-.2,0,0,-.8,0]) # max z: 0.25002
    robot.act2([0, -.2, 1, 0, 0, 0.5])  # min z: 0.016000
    robot.step()
    print(robot.get_tip()[0]
          [1:])  # cut off x axis, only return y (new x) and z (new y)
    time.sleep(0.1)
Esempio n. 12
0
class GoalBabbling(object):
    def __init__(self, action_noise, num_retries, task):
        self.noise = action_noise
        self.retries = num_retries
        self.task = task
        if task == 'pusher':
            self.env = gym.make('Nas-Pusher-3dof-Vanilla-v1')
            self.action_len = self.env.action_space.shape[0]
        else:
            self.robot = SingleRobot(debug=False)
            self.action_len = len(self.robot.motor_ids)
        self._nn_set = learners.NNSet()
        np.random.seed(seed=225)
        random.seed(225)

    def nearest_neighbor(self, goal, history):
        """Return the motor command of the nearest neighbor of the goal"""
        if len(history) < len(self._nn_set):  # HACK
            self._nn_set = learners.NNSet()
        for m_command, effect in history[len(self._nn_set):]:
            self._nn_set.add(m_command, y=effect)
        idx = self._nn_set.nn_y(goal)[1][0]
        return history[idx][0]

    def add_noise(self, nn_command):
        action = np.asarray(nn_command)
        action_noise = np.random.uniform(-self.noise, self.noise,
                                         self.action_len)
        new_command = action + action_noise
        if self.task == 'reacher':
            new_command[0], new_command[3] = 0, 0
            new_command = np.clip(new_command, -1, 1)
        new_command = np.clip(new_command, -3, 3)
        return new_command

    def sample_action(self):
        action = self.env.action_space.sample()
        if self.task == 'reacher':
            action = np.random.uniform(-1, 1, self.action_len)
            action[0], action[3] = 0, 0
        return action

    def action_retries(self, goal, history):
        history_local = []
        goal = np.asarray([goal])
        action = self.nearest_neighbor(goal, history)
        for _ in range(self.retries):
            action_noise = self.add_noise(action)
            _, end_position, obs = self.perform_action(action_noise)
            history_local.append((action_noise, end_position))
        action_new = self.nearest_neighbor(goal, history_local)
        return action_new

    def perform_action(self, action):
        if self.task == 'reacher':
            self.robot.act(action)
            self.robot.step()
            end_pos = self.robot.get_tip()
            obs = self.robot.observe()
        else:
            obs, _, _, _ = self.env.step(action)
            end_pos = list(obs[6:8])
        return action, end_pos, obs

    def reset_robot(self):
        if self.task == 'reacher':
            self.robot.reset()
            self.robot.rest()
            self.robot.step()
        else:
            observation = self.env.reset()
        return observation

    @staticmethod
    def dist(a, b):
        return np.linalg.norm(a - b)  # We dont need this method anymore
class ErgoReacherHeavyEnv(gym.Env):

  def __init__(self,
               headless=False,
               simple=False,
               max_force=1000,
               max_vel=100,
               goal_halfsphere=False,
               backlash=.1,
               double_goal=False):
    self.simple = simple
    self.max_force = max_force
    self.max_vel = max_vel
    self.double_goal = double_goal

    self.robot = SingleRobot(
        debug=not headless, heavy=True, new_backlash=backlash, silent=True)
    self.ball = Ball(1)
    self.rhis = RandomPointInHalfSphere(
        0.0,
        3.69,
        4.37,
        radius=RADIUS,
        height=26.10,
        min_dist=10.,
        halfsphere=goal_halfsphere)
    self.goal = None
    self.goals_done = 0
    self.goal_dirty = False
    self.dist = DistanceBetweenObjects(
        bodyA=self.robot.id, bodyB=self.ball.id, linkA=19, linkB=1)
    self.episodes = 0  # used for resetting the sim every so often
    self.restart_every_n_episodes = 1000

    self.force_urdf_reload = False

    self.metadata = {'render.modes': ['human']}

    if not simple:
      # observation = 6 joints + 6 velocities + 3 coordinates for target
      self.observation_space = spaces.Box(
          low=-1, high=1, shape=(6 + 6 + 3,), dtype=np.float32)  #
      # action = 6 joint angles
      self.action_space = spaces.Box(
          low=-1, high=1, shape=(6,), dtype=np.float32)  #

    else:
      # observation = 4 joints + 4 velocities + 2 coordinates for target
      self.observation_space = spaces.Box(
          low=-1, high=1, shape=(4 + 4 + 2,), dtype=np.float32)  #
      # action = 4 joint angles
      self.action_space = spaces.Box(
          low=-1, high=1, shape=(4,), dtype=np.float32)  #

    super().__init__()

  def seed(self, seed=None):
    return [np.random.seed(seed)]

  def step(self, action):
    if self.simple:
      action_ = np.zeros(6, np.float32)
      action_[[1, 2, 4, 5]] = action
      action = action_

    self.robot.act2(action, max_force=self.max_force, max_vel=self.max_vel)
    self.robot.step()
    self.robot.step()
    self.robot.step()

    reward, done = self._getReward()

    obs = self._get_obs()
    return obs, reward, done, {}

  def _getReward(self):
    done = False

    reward = self.dist.query()
    reward *= -1  # the reward is the inverse distance

    if not self.double_goal:  # this is the normal mode
      if reward > -1.6:  # this is a bit arbitrary, but works well
        done = True
        reward = 1
    else:
      if reward > -1.6:
        self.goals_done += 1
        if self.goals_done == MAX_GOALS:
          done = True
        else:
          self.move_ball()
          self.goal_dirty = True

      max_multiplier = (MAX_GOALS - self.goals_done - 1)
      if self.goal_dirty:
        max_multiplier += 1
        self.goal_dirty = False

      # unnormalized:
      reward = reward - (RADIUS * 2 * max_multiplier)

      # # normalize - [0,1] range:
      reward = (reward + (RADIUS * 2 * (MAX_GOALS))) / (
          RADIUS * 2 * (MAX_GOALS))
      if done:
        reward = 1

      # normalize - [-1,1] range:
      reward = reward * 2 - 1

    return reward, done

  def _setDist(self):
    self.dist.bodyA = self.robot.id
    self.dist.bodyB = self.ball.id

  def update_backlash(self, new_val):
    self.robot.new_backlash = new_val
    self.force_urdf_reload = True
    # and now on the next self.reset() the new modified URDF will be loaded

  def move_ball(self):
    if self.simple:
      self.goal = self.rhis.sampleSimplePoint()
    else:
      self.goal = self.rhis.samplePoint()

    self.dist.goal = self.goal
    self.ball.changePos(self.goal, 4)

    for _ in range(20):
      self.robot.step()  # we need this to move the ball

  def reset(self):
    self.goals_done = 0
    self.goal_dirty = False

    self.episodes += 1
    if self.force_urdf_reload or self.episodes >= self.restart_every_n_episodes:
      self.robot.hard_reset()  # this always has to go first
      self.ball.hard_reset()
      self._setDist()
      self.episodes = 0
      self.force_urdf_reload = False

    self.move_ball()

    qpos = np.random.uniform(low=-0.2, high=0.2, size=6)

    if self.simple:
      qpos[[0, 3]] = 0

    self.robot.reset()
    self.robot.set(np.hstack((qpos, [0] * 6)))
    self.robot.act2(np.hstack((qpos)))
    self.robot.step()

    return self._get_obs()

  def _get_obs(self):
    obs = np.hstack([self.robot.observe(), self.rhis.normalize(self.goal)])
    if self.simple:
      obs = obs[[1, 2, 4, 5, 7, 8, 10, 11, 13, 14]]
    return obs

  def render(self, mode='human', close=False):
    pass

  def close(self):
    self.robot.close()

  def _get_state(self):
    return self.robot.observe()

  def _set_state(self, posvel):
    if self.simple:
      new_state = np.zeros((12), dtype=np.float32)
      new_state[[1, 2, 4, 5, 7, 8, 10, 11]] = posvel
    else:
      new_state = np.array(posvel)
    self.robot.set(new_state)
import time

from gym_ergojr.sim.single_robot import SingleRobot

robot = SingleRobot(debug=True)

actions = [[-1, 0, 0, -1, 0, -1], [1, 0, 0, 1, 0, .2], [0, 0, 0, 0, 0, 0]]
for action in actions:
    robot.act2(action)  # don't need to call this at every step
    for i in range(100):
        robot.step()
        print(robot.observe().round(2))
        time.sleep(.01)

    time.sleep(1)
    print("switch")
Esempio n. 15
0
from gym_ergojr.sim.single_robot import SingleRobot
from s2rr.movements.dataset import DatasetProduction
from itertools import cycle

cycol = cycle('bgrcmk')

ds = DatasetProduction()
ds.load("~/data/sim2real/data-realigned-v3-{}.npz".format("train"))

epi = np.random.randint(0, len(ds.current_real))

joints_sim = np.zeros((299, 6), np.float32)
joints_real_sim = np.zeros((299, 6), np.float32)
joints_real = np.zeros((299, 6), np.float32)

robot_real = SingleRobot(debug=True)
for frame in range(299):
    robot_real.set(ds.current_real[epi, frame])
    # robot_real.act2(ds.current_real[epi, frame, :6])
    robot_real.step()
    joints_real_sim[frame, :] = robot_real.observe()[:6]
    # time.sleep(0.1)
robot_real.close()

robot_sim = SingleRobot(debug=True)
robot_sim.set(ds.current_real[epi, 0])
robot_sim.act2(ds.current_real[epi, 0, :6])
robot_sim.step()
for frame in range(299):
    robot_sim.act2(ds.action[epi, frame])
    robot_sim.step()
from tkinter import *
from gym_ergojr.sim.single_robot import SingleRobot

master = Tk()

robot = SingleRobot(debug=True)


def getActions():
    return [m.get() for m in motors]


def stepN(N):
    action = getActions()
    print("action", action)
    robot.act2(action)
    for i in range(N):
        robot.step()
    print("tip", robot.get_tip())


def step1():
    stepN(1)


def step10():
    stepN(10)


def step100():
    stepN(100)
import os

from gym_ergojr.sim.single_robot import SingleRobot
import time
import numpy as np
from tqdm import tqdm

for run in ["train", "test"]:
    robot = SingleRobot(debug=True, robot_model="ergojr-penholder")
    ds = np.load(
        os.path.expanduser(
            "~/data/sim2real/data-realigned-{}.npz".format(run)))

    ds_curr_real = ds["ds_curr_real"]
    ds_next_real = ds["ds_next_real"]
    ds_action = ds["ds_action"]
    ds_epi = ds["ds_epi"]
    ds_next_sim = []

    print(ds_curr_real.shape)

    for i in tqdm(range(len(ds_curr_real))):
        robot.set(ds_curr_real[i])
        # print(robot.observe().round(2))
        robot.act2(ds_action[i])

        robot.step()

        obs = robot.observe()
        ds_next_sim.append(obs)
        x = x.cuda()
    return x


def save_model(state):
    torch.save({"state_dict": state}, MODEL_PATH)


loss_function = torch.nn.MSELoss()
exp = Experiment("[sim2real] lstm-realv6")
# exp.param("exp", EXPERIMENT)
# exp.param("layers", LSTM_LAYERS)
# exp.param("nodes", HIDDEN_NODES)
optimizer = torch.optim.Adam(net.parameters())

robot = SingleRobot(debug=False)

for epoch in range(EPOCHS):
    for epi in range(len(ds.current_real)):

        net.zero_hidden()  # !important
        net.hidden[0].detach_()  # !important
        net.hidden[1].detach_()  # !important
        net.zero_grad()
        optimizer.zero_grad()

        robot.set(ds.current_real[epi, 0])
        robot.act2(ds.current_real[epi, 0, :6])
        robot.step()

        losses = Variable(torch.zeros(1))
Esempio n. 19
0
import time
import numpy as np
from gym_ergojr.sim.single_robot import SingleRobot

robot = SingleRobot(debug=True)

for i in range(20):
    robot.set(np.random.normal(0, 1, 12))
    print(robot.observe().round(2))
    # robot.step()
    time.sleep(.1)
    robot.act2(np.random.normal(0, 1, 6))
    robot.step()
    print(robot.observe().round(2))
    time.sleep(.5)

    print("---")

time.sleep(1)
print("switch")