def __init__(self, headless=False, simple=False, max_force=1000, max_vel=100, goal_halfsphere=False, backlash=.1, double_goal=False): self.simple = simple self.max_force = max_force self.max_vel = max_vel self.double_goal = double_goal self.robot = SingleRobot( debug=not headless, heavy=True, new_backlash=backlash, silent=True) self.ball = Ball(1) self.rhis = RandomPointInHalfSphere( 0.0, 3.69, 4.37, radius=RADIUS, height=26.10, min_dist=10., halfsphere=goal_halfsphere) self.goal = None self.goals_done = 0 self.goal_dirty = False self.dist = DistanceBetweenObjects( bodyA=self.robot.id, bodyB=self.ball.id, linkA=19, linkB=1) self.episodes = 0 # used for resetting the sim every so often self.restart_every_n_episodes = 1000 self.force_urdf_reload = False self.metadata = {'render.modes': ['human']} if not simple: # observation = 6 joints + 6 velocities + 3 coordinates for target self.observation_space = spaces.Box( low=-1, high=1, shape=(6 + 6 + 3,), dtype=np.float32) # # action = 6 joint angles self.action_space = spaces.Box( low=-1, high=1, shape=(6,), dtype=np.float32) # else: # observation = 4 joints + 4 velocities + 2 coordinates for target self.observation_space = spaces.Box( low=-1, high=1, shape=(4 + 4 + 2,), dtype=np.float32) # # action = 4 joint angles self.action_space = spaces.Box( low=-1, high=1, shape=(4,), dtype=np.float32) # super().__init__()
def __init__(self, action_noise, num_retries, task): self.noise = action_noise self.retries = num_retries self.task = task if task == 'pusher': self.robot = PusherRobotNoisy() else: self.robot = SingleRobot(debug=False) self.action_len = len(self.robot.motor_ids) self._nn_set = learners.NNSet() np.random.seed(seed=225) random.seed(225)
def __init__(self, action_noise, num_retries, task): self.noise = action_noise self.retries = num_retries self.task = task if task == 'pusher': self.env = gym.make('Nas-Pusher-3dof-Vanilla-v1') self.action_len = self.env.action_space.shape[0] else: self.robot = SingleRobot(debug=False) self.action_len = len(self.robot.motor_ids) self._nn_set = learners.NNSet() np.random.seed(seed=225) random.seed(225)
def __init__(self, headless=False, cube_spawn="linear", touchy=False): assert cube_spawn in ["linear", "square"] self.cube_spawn = cube_spawn self.touchy = touchy self.goals_done = 0 self.is_initialized = False self.robot = SingleRobot(robot_model="ergojr-gripper", debug=not headless, gripper=True, reset=False, frequency=60) # Cube the robot must reach, created in reset method self.cube = None self.cam = Cam(pos=[0.25, .25, 0.1], look_at=[0.00, .10, 0.1], width=64, height=64, fov=60) self.plot = None # Episode count, used for resetting the PyBullet sim every so often self.episodes = 0 self.metadata = {'render.modes': ['human', 'rgb_array']} # observation = (img, 6 joints + 6 velocities + 3 cube position) self.observation_space = spaces.Tuple([ spaces.Box(low=0, high=255, shape=(64, 64, 3), dtype=np.uint8), spaces.Box(low=-1, high=1, shape=(3 + 3 + 2 + 2, ), dtype=np.float32) ]) # 6 joint angles in [-1, 1] self.action_space = spaces.Box(low=-1, high=1, shape=(6, ), dtype=np.float32) super().__init__()
class ErgoGripperEnv(gym.Env): def __init__(self, headless=False, cube_spawn="linear", touchy=False): assert cube_spawn in ["linear", "square"] self.cube_spawn = cube_spawn self.touchy = touchy self.goals_done = 0 self.is_initialized = False self.robot = SingleRobot(robot_model="ergojr-gripper", debug=not headless, gripper=True, reset=False, frequency=60) # Cube the robot must reach, created in reset method self.cube = None self.cam = Cam(pos=[0.25, .25, 0.1], look_at=[0.00, .10, 0.1], width=64, height=64, fov=60) self.plot = None # Episode count, used for resetting the PyBullet sim every so often self.episodes = 0 self.metadata = {'render.modes': ['human', 'rgb_array']} # observation = (img, 6 joints + 6 velocities + 3 cube position) self.observation_space = spaces.Tuple([ spaces.Box(low=0, high=255, shape=(64, 64, 3), dtype=np.uint8), spaces.Box(low=-1, high=1, shape=(3 + 3 + 2 + 2, ), dtype=np.float32) ]) # 6 joint angles in [-1, 1] self.action_space = spaces.Box(low=-1, high=1, shape=(6, ), dtype=np.float32) super().__init__() def seed(self, seed=None): return [np.random.seed(seed)] def step(self, action): self.robot.act2(action, max_vel=.8, max_force=.8) for _ in range(FRAME_SKIP): self.robot.step() reward, done, dist = self._getReward() obs = self._get_obs() return obs, reward, done, {"distance": dist} def get_collision(self, bodyB): outA = p.getContactPoints(bodyA=self.robot.id, bodyB=bodyB, linkIndexA=14, linkIndexB=0) outB = p.getContactPoints(bodyA=self.robot.id, bodyB=bodyB, linkIndexA=13, linkIndexB=0) return len(outA) + len(outB) def _getReward(self): done = False reward = self.cube.dbo.query() distance = reward.copy() collision = self.get_collision(self.cube.cube) reward *= -1 # the reward is the inverse distance if distance < GOAL_REACHED_DISTANCE or ( self.touchy and collision > 0): # this is a bit arbitrary, but works well done = True reward = 1 return reward, done, distance def reset(self, forced=False): self.episodes += 1 if self.episodes >= RESTART_EVERY_N_EPISODES or forced or not self.is_initialized: if self.cube is not None: self.cube.cleanup() self.robot.hard_reset() # this always has to go first self.cube = Cube(self.robot.id, self.cube_spawn) self.cube.reset() self.episodes = 0 self.is_initialized = True else: self.cube.reset() qpos = self.robot.rest_pos.copy() qpos += np.random.uniform(low=-0.2, high=0.2, size=6) qposvel = np.zeros(12, dtype=np.float32) qposvel[:6] = qpos for _ in range(20): # to stabilize cube at lower framerates self.robot.step() self.robot.set(qposvel) self.robot.act2(qposvel[:6]) self.robot.step() return self._get_obs() def _get_obs(self): obs = np.hstack([self.robot.observe(), self.cube.normalize_cube()]) img = self.cam.snap() img = (img * 255).astype(np.uint8) return img, obs def render(self, mode='human', close=False): if mode == "human": img = self.cam.snap() if self.plot is None: plt.ion() self.plot_container = plt.imshow(img, interpolation='none', animated=True, label="live feed") self.plot = plt.gca() self.plot_container.set_data(img) self.plot.plot([0]) plt.pause(0.001) else: return self._get_obs()[0] def close(self): self.robot.close() def _get_state(self): return self.robot.observe()
import numpy as np import matplotlib.pyplot as plt from gym_ergojr.sim.single_robot import SingleRobot from arguments import get_args import os robot = SingleRobot(debug=False) # 6DOF Reacher Robot args = get_args() file_path = os.getcwd() + '/data/freq{}/{}'.format(args.freq, args.approach) if not os.path.isdir(file_path): os.makedirs(file_path) np.random.seed(seed=225) total_steps = 10000 * 100 rest_interval = 10 * 100 freq = args.freq count = 0 steps_until_resample = 100 / freq sim_trajectories = np.zeros((total_steps, 12)) actions = np.zeros((total_steps, 6)) bad_actions = np.zeros((total_steps)) robot.reset() robot.step() end_pos = [] for epi in range(total_steps): if epi % rest_interval == 0: # Take rest after every 10 * 100 steps
def __init__(self, headless=False, simple=False, backlash=False, max_force=1, max_vel=18, goal_halfsphere=False, multi_goal=False, goals=3, gripper=False): self.simple = simple self.backlash = backlash self.max_force = max_force self.max_vel = max_vel self.multigoal = multi_goal self.n_goals = goals self.gripper = gripper self.goals_done = 0 self.is_initialized = False self.robot = SingleRobot(debug=not headless, backlash=backlash) self.ball = Ball() self.rhis = RandomPointInHalfSphere(0.0, 0.0369, 0.0437, radius=RADIUS, height=0.2610, min_dist=0.1, halfsphere=goal_halfsphere) self.goal = None self.dist = DistanceBetweenObjects(bodyA=self.robot.id, bodyB=self.ball.id, linkA=13, linkB=1) self.episodes = 0 # used for resetting the sim every so often self.restart_every_n_episodes = 1000 self.metadata = {'render.modes': ['human']} if not simple and not gripper: # default # observation = 6 joints + 6 velocities + 3 coordinates for target self.observation_space = spaces.Box(low=-1, high=1, shape=(6 + 6 + 3, ), dtype=np.float32) # # action = 6 joint angles self.action_space = spaces.Box(low=-1, high=1, shape=(6, ), dtype=np.float32) # elif not gripper: # simple # observation = 4 joints + 4 velocities + 2 coordinates for target self.observation_space = spaces.Box(low=-1, high=1, shape=(4 + 4 + 2, ), dtype=np.float32) # # action = 4 joint angles self.action_space = spaces.Box(low=-1, high=1, shape=(4, ), dtype=np.float32) # else: # gripper # observation = 3 joints + 3 velocities + 2 coordinates for target self.observation_space = spaces.Box(low=-1, high=1, shape=(3 + 3 + 2, ), dtype=np.float32) # # action = 3 joint angles, [-,1,2,-,4,-] self.action_space = spaces.Box(low=-1, high=1, shape=(3, ), dtype=np.float32) # super().__init__()
class ErgoReacherEnv(gym.Env): def __init__(self, headless=False, simple=False, backlash=False, max_force=1, max_vel=18, goal_halfsphere=False, multi_goal=False, goals=3, gripper=False): self.simple = simple self.backlash = backlash self.max_force = max_force self.max_vel = max_vel self.multigoal = multi_goal self.n_goals = goals self.gripper = gripper self.goals_done = 0 self.is_initialized = False self.robot = SingleRobot(debug=not headless, backlash=backlash) self.ball = Ball() self.rhis = RandomPointInHalfSphere(0.0, 0.0369, 0.0437, radius=RADIUS, height=0.2610, min_dist=0.1, halfsphere=goal_halfsphere) self.goal = None self.dist = DistanceBetweenObjects(bodyA=self.robot.id, bodyB=self.ball.id, linkA=13, linkB=1) self.episodes = 0 # used for resetting the sim every so often self.restart_every_n_episodes = 1000 self.metadata = {'render.modes': ['human']} if not simple and not gripper: # default # observation = 6 joints + 6 velocities + 3 coordinates for target self.observation_space = spaces.Box(low=-1, high=1, shape=(6 + 6 + 3, ), dtype=np.float32) # # action = 6 joint angles self.action_space = spaces.Box(low=-1, high=1, shape=(6, ), dtype=np.float32) # elif not gripper: # simple # observation = 4 joints + 4 velocities + 2 coordinates for target self.observation_space = spaces.Box(low=-1, high=1, shape=(4 + 4 + 2, ), dtype=np.float32) # # action = 4 joint angles self.action_space = spaces.Box(low=-1, high=1, shape=(4, ), dtype=np.float32) # else: # gripper # observation = 3 joints + 3 velocities + 2 coordinates for target self.observation_space = spaces.Box(low=-1, high=1, shape=(3 + 3 + 2, ), dtype=np.float32) # # action = 3 joint angles, [-,1,2,-,4,-] self.action_space = spaces.Box(low=-1, high=1, shape=(3, ), dtype=np.float32) # super().__init__() def seed(self, seed=None): return [np.random.seed(seed)] def step(self, action): if self.simple or self.gripper: action_ = np.zeros(6, np.float32) if self.simple: action_[[1, 2, 4, 5]] = action if self.gripper: action_[[1, 2, 4]] = action action = action_ self.robot.act2(action, max_force=self.max_force, max_vel=self.max_vel) self.robot.step() reward, done, dist = self._getReward() obs = self._get_obs() return obs, reward, done, {"distance": dist} def _getReward(self): done = False reward = self.dist.query() distance = reward.copy() if not self.multigoal: # this is the normal mode reward *= -1 # the reward is the inverse distance if reward > GOAL_REACHED_DISTANCE: # this is a bit arbitrary, but works well self.goals_done += 1 done = True reward = 1 else: if -reward > GOAL_REACHED_DISTANCE: self.goals_done += 1 if self.goals_done == self.n_goals: done = True else: robot_state = self._get_obs()[:8] self.move_ball() self._set_state( robot_state) # move robot back after ball has movedÃ’ self.robot.step() reward = self.dist.query() reward = (self.goals_done * DIA + (DIA - reward)) / (self.n_goals * DIA) if done: reward = 1 if self.gripper: reward *= 10 if self.goals_done == RESET_EVERY: self.goals_done = 0 self.reset(True) done = False # normalize - [-1,1] range: # reward = reward * 2 - 1 return reward, done, distance def _setDist(self): self.dist.bodyA = self.robot.id self.dist.bodyB = self.ball.id def move_ball(self): if self.simple or self.gripper: self.goal = self.rhis.sampleSimplePoint() else: self.goal = self.rhis.samplePoint() self.dist.goal = self.goal self.ball.changePos(self.goal, 4) for _ in range(20): self.robot.step() # we need this to move the ball def reset(self, forced=False): self.goals_done = 0 self.episodes += 1 if self.episodes >= self.restart_every_n_episodes: self.robot.hard_reset() # this always has to go first self.ball.hard_reset() self._setDist() self.episodes = 0 if self.is_initialized: robot_state = self._get_state() self.move_ball() if self.gripper and self.is_initialized: self._set_state( robot_state[:6]) # move robot back after ball has movedÃ’ self.robot.step() if forced or not self.gripper: # if it's the gripper qpos = np.random.uniform(low=-0.2, high=0.2, size=6) if self.simple: qpos[[0, 3]] = 0 self.robot.reset() self.robot.set(np.hstack((qpos, [0] * 6))) self.robot.act2(np.hstack((qpos))) self.robot.step() self.is_initialized = True return self._get_obs() def _get_obs(self): obs = np.hstack([self.robot.observe(), self.rhis.normalize(self.goal)]) if self.simple: obs = obs[[1, 2, 4, 5, 7, 8, 10, 11, 13, 14]] if self.gripper: obs = obs[[1, 2, 4, 7, 8, 10, 13, 14]] return obs def render(self, mode='human', close=False): pass def close(self): self.robot.close() def _get_state(self): return self.robot.observe() def _set_state(self, posvel): if self.simple or self.gripper: new_state = np.zeros((12), dtype=np.float32) if self.simple: new_state[[1, 2, 4, 5, 7, 8, 10, 11]] = posvel if self.gripper: new_state[[1, 2, 4, 7, 8, 10]] = posvel else: new_state = np.array(posvel) self.robot.set(new_state)
volatile=True) def data_to_var_nosim(real_t1, action): return Variable(double_unsqueeze( torch.cat([ torch.from_numpy(real_t1).float(), torch.from_numpy(action).float() ], dim=0)), volatile=True) #### SIM robot = SingleRobot(debug=False) robot.set(ds.current_real[epi, 0]) robot.act2(ds.current_real[epi, 0, :6]) robot.step() for frame in range(299): robot.act2(ds.action[epi, frame]) robot.step() joints_sim[frame, :] = robot.observe()[:6] # time.sleep(0.1) robot.close() ### REAL_SIM for frame in range(299): joints_real[frame, :] = ds.current_real[epi, frame, :6]
import time from pypot.robot import from_remote import numpy as np from gym_ergojr.sim.single_robot import SingleRobot robot_sim = SingleRobot(debug=True) robot_real = from_remote('flogo2.local', 4242) poses = [ [45, 0, 0, 0, 0, 0], [0, 45, 0, 0, 0, 0], [0, 0, 45, 0, 0, 0], [0, 0, 0, 45, 0, 0], [0, 0, 0, 0, 45, 0], [0, 0, 0, 0, 0, 45], ] for pose in poses: pose_sim = np.deg2rad(pose + [0] * 6) / (np.pi / 2) robot_sim.set(pose_sim) robot_sim.act2(pose_sim[:6]) # robot_sim.step() for i in range(100): robot_sim.step() print(robot_sim.observe().round(1)) for i, m in enumerate(robot_real.motors): m.goal_position = pose[i] time.sleep(2)
import time from gym_ergojr.sim.single_robot import SingleRobot robot = SingleRobot(debug=True) robot.reset() robot.step() for i in range(100): # robot.act2([0,-1,0,0,-1,0]) # min y: -0.1436 # robot.act2([0,1,-1,0,0,0]) # max y: 0.22358 # robot.act2([0,-.2,0,0,-.8,0]) # max z: 0.25002 robot.act2([0, -.2, 1, 0, 0, 0.5]) # min z: 0.016000 robot.step() print(robot.get_tip()[0] [1:]) # cut off x axis, only return y (new x) and z (new y) time.sleep(0.1)
class GoalBabbling(object): def __init__(self, action_noise, num_retries, task): self.noise = action_noise self.retries = num_retries self.task = task if task == 'pusher': self.env = gym.make('Nas-Pusher-3dof-Vanilla-v1') self.action_len = self.env.action_space.shape[0] else: self.robot = SingleRobot(debug=False) self.action_len = len(self.robot.motor_ids) self._nn_set = learners.NNSet() np.random.seed(seed=225) random.seed(225) def nearest_neighbor(self, goal, history): """Return the motor command of the nearest neighbor of the goal""" if len(history) < len(self._nn_set): # HACK self._nn_set = learners.NNSet() for m_command, effect in history[len(self._nn_set):]: self._nn_set.add(m_command, y=effect) idx = self._nn_set.nn_y(goal)[1][0] return history[idx][0] def add_noise(self, nn_command): action = np.asarray(nn_command) action_noise = np.random.uniform(-self.noise, self.noise, self.action_len) new_command = action + action_noise if self.task == 'reacher': new_command[0], new_command[3] = 0, 0 new_command = np.clip(new_command, -1, 1) new_command = np.clip(new_command, -3, 3) return new_command def sample_action(self): action = self.env.action_space.sample() if self.task == 'reacher': action = np.random.uniform(-1, 1, self.action_len) action[0], action[3] = 0, 0 return action def action_retries(self, goal, history): history_local = [] goal = np.asarray([goal]) action = self.nearest_neighbor(goal, history) for _ in range(self.retries): action_noise = self.add_noise(action) _, end_position, obs = self.perform_action(action_noise) history_local.append((action_noise, end_position)) action_new = self.nearest_neighbor(goal, history_local) return action_new def perform_action(self, action): if self.task == 'reacher': self.robot.act(action) self.robot.step() end_pos = self.robot.get_tip() obs = self.robot.observe() else: obs, _, _, _ = self.env.step(action) end_pos = list(obs[6:8]) return action, end_pos, obs def reset_robot(self): if self.task == 'reacher': self.robot.reset() self.robot.rest() self.robot.step() else: observation = self.env.reset() return observation @staticmethod def dist(a, b): return np.linalg.norm(a - b) # We dont need this method anymore
class ErgoReacherHeavyEnv(gym.Env): def __init__(self, headless=False, simple=False, max_force=1000, max_vel=100, goal_halfsphere=False, backlash=.1, double_goal=False): self.simple = simple self.max_force = max_force self.max_vel = max_vel self.double_goal = double_goal self.robot = SingleRobot( debug=not headless, heavy=True, new_backlash=backlash, silent=True) self.ball = Ball(1) self.rhis = RandomPointInHalfSphere( 0.0, 3.69, 4.37, radius=RADIUS, height=26.10, min_dist=10., halfsphere=goal_halfsphere) self.goal = None self.goals_done = 0 self.goal_dirty = False self.dist = DistanceBetweenObjects( bodyA=self.robot.id, bodyB=self.ball.id, linkA=19, linkB=1) self.episodes = 0 # used for resetting the sim every so often self.restart_every_n_episodes = 1000 self.force_urdf_reload = False self.metadata = {'render.modes': ['human']} if not simple: # observation = 6 joints + 6 velocities + 3 coordinates for target self.observation_space = spaces.Box( low=-1, high=1, shape=(6 + 6 + 3,), dtype=np.float32) # # action = 6 joint angles self.action_space = spaces.Box( low=-1, high=1, shape=(6,), dtype=np.float32) # else: # observation = 4 joints + 4 velocities + 2 coordinates for target self.observation_space = spaces.Box( low=-1, high=1, shape=(4 + 4 + 2,), dtype=np.float32) # # action = 4 joint angles self.action_space = spaces.Box( low=-1, high=1, shape=(4,), dtype=np.float32) # super().__init__() def seed(self, seed=None): return [np.random.seed(seed)] def step(self, action): if self.simple: action_ = np.zeros(6, np.float32) action_[[1, 2, 4, 5]] = action action = action_ self.robot.act2(action, max_force=self.max_force, max_vel=self.max_vel) self.robot.step() self.robot.step() self.robot.step() reward, done = self._getReward() obs = self._get_obs() return obs, reward, done, {} def _getReward(self): done = False reward = self.dist.query() reward *= -1 # the reward is the inverse distance if not self.double_goal: # this is the normal mode if reward > -1.6: # this is a bit arbitrary, but works well done = True reward = 1 else: if reward > -1.6: self.goals_done += 1 if self.goals_done == MAX_GOALS: done = True else: self.move_ball() self.goal_dirty = True max_multiplier = (MAX_GOALS - self.goals_done - 1) if self.goal_dirty: max_multiplier += 1 self.goal_dirty = False # unnormalized: reward = reward - (RADIUS * 2 * max_multiplier) # # normalize - [0,1] range: reward = (reward + (RADIUS * 2 * (MAX_GOALS))) / ( RADIUS * 2 * (MAX_GOALS)) if done: reward = 1 # normalize - [-1,1] range: reward = reward * 2 - 1 return reward, done def _setDist(self): self.dist.bodyA = self.robot.id self.dist.bodyB = self.ball.id def update_backlash(self, new_val): self.robot.new_backlash = new_val self.force_urdf_reload = True # and now on the next self.reset() the new modified URDF will be loaded def move_ball(self): if self.simple: self.goal = self.rhis.sampleSimplePoint() else: self.goal = self.rhis.samplePoint() self.dist.goal = self.goal self.ball.changePos(self.goal, 4) for _ in range(20): self.robot.step() # we need this to move the ball def reset(self): self.goals_done = 0 self.goal_dirty = False self.episodes += 1 if self.force_urdf_reload or self.episodes >= self.restart_every_n_episodes: self.robot.hard_reset() # this always has to go first self.ball.hard_reset() self._setDist() self.episodes = 0 self.force_urdf_reload = False self.move_ball() qpos = np.random.uniform(low=-0.2, high=0.2, size=6) if self.simple: qpos[[0, 3]] = 0 self.robot.reset() self.robot.set(np.hstack((qpos, [0] * 6))) self.robot.act2(np.hstack((qpos))) self.robot.step() return self._get_obs() def _get_obs(self): obs = np.hstack([self.robot.observe(), self.rhis.normalize(self.goal)]) if self.simple: obs = obs[[1, 2, 4, 5, 7, 8, 10, 11, 13, 14]] return obs def render(self, mode='human', close=False): pass def close(self): self.robot.close() def _get_state(self): return self.robot.observe() def _set_state(self, posvel): if self.simple: new_state = np.zeros((12), dtype=np.float32) new_state[[1, 2, 4, 5, 7, 8, 10, 11]] = posvel else: new_state = np.array(posvel) self.robot.set(new_state)
import time from gym_ergojr.sim.single_robot import SingleRobot robot = SingleRobot(debug=True) actions = [[-1, 0, 0, -1, 0, -1], [1, 0, 0, 1, 0, .2], [0, 0, 0, 0, 0, 0]] for action in actions: robot.act2(action) # don't need to call this at every step for i in range(100): robot.step() print(robot.observe().round(2)) time.sleep(.01) time.sleep(1) print("switch")
from gym_ergojr.sim.single_robot import SingleRobot from s2rr.movements.dataset import DatasetProduction from itertools import cycle cycol = cycle('bgrcmk') ds = DatasetProduction() ds.load("~/data/sim2real/data-realigned-v3-{}.npz".format("train")) epi = np.random.randint(0, len(ds.current_real)) joints_sim = np.zeros((299, 6), np.float32) joints_real_sim = np.zeros((299, 6), np.float32) joints_real = np.zeros((299, 6), np.float32) robot_real = SingleRobot(debug=True) for frame in range(299): robot_real.set(ds.current_real[epi, frame]) # robot_real.act2(ds.current_real[epi, frame, :6]) robot_real.step() joints_real_sim[frame, :] = robot_real.observe()[:6] # time.sleep(0.1) robot_real.close() robot_sim = SingleRobot(debug=True) robot_sim.set(ds.current_real[epi, 0]) robot_sim.act2(ds.current_real[epi, 0, :6]) robot_sim.step() for frame in range(299): robot_sim.act2(ds.action[epi, frame]) robot_sim.step()
from tkinter import * from gym_ergojr.sim.single_robot import SingleRobot master = Tk() robot = SingleRobot(debug=True) def getActions(): return [m.get() for m in motors] def stepN(N): action = getActions() print("action", action) robot.act2(action) for i in range(N): robot.step() print("tip", robot.get_tip()) def step1(): stepN(1) def step10(): stepN(10) def step100(): stepN(100)
import os from gym_ergojr.sim.single_robot import SingleRobot import time import numpy as np from tqdm import tqdm for run in ["train", "test"]: robot = SingleRobot(debug=True, robot_model="ergojr-penholder") ds = np.load( os.path.expanduser( "~/data/sim2real/data-realigned-{}.npz".format(run))) ds_curr_real = ds["ds_curr_real"] ds_next_real = ds["ds_next_real"] ds_action = ds["ds_action"] ds_epi = ds["ds_epi"] ds_next_sim = [] print(ds_curr_real.shape) for i in tqdm(range(len(ds_curr_real))): robot.set(ds_curr_real[i]) # print(robot.observe().round(2)) robot.act2(ds_action[i]) robot.step() obs = robot.observe() ds_next_sim.append(obs)
x = x.cuda() return x def save_model(state): torch.save({"state_dict": state}, MODEL_PATH) loss_function = torch.nn.MSELoss() exp = Experiment("[sim2real] lstm-realv6") # exp.param("exp", EXPERIMENT) # exp.param("layers", LSTM_LAYERS) # exp.param("nodes", HIDDEN_NODES) optimizer = torch.optim.Adam(net.parameters()) robot = SingleRobot(debug=False) for epoch in range(EPOCHS): for epi in range(len(ds.current_real)): net.zero_hidden() # !important net.hidden[0].detach_() # !important net.hidden[1].detach_() # !important net.zero_grad() optimizer.zero_grad() robot.set(ds.current_real[epi, 0]) robot.act2(ds.current_real[epi, 0, :6]) robot.step() losses = Variable(torch.zeros(1))
import time import numpy as np from gym_ergojr.sim.single_robot import SingleRobot robot = SingleRobot(debug=True) for i in range(20): robot.set(np.random.normal(0, 1, 12)) print(robot.observe().round(2)) # robot.step() time.sleep(.1) robot.act2(np.random.normal(0, 1, 6)) robot.step() print(robot.observe().round(2)) time.sleep(.5) print("---") time.sleep(1) print("switch")