def __init__(self, gym, sim, EnvClass, TaskClass, num_envs, spacing, max_episode_steps, sim_params): self._num_envs = num_envs self._num_actions = TaskClass.num_actions() self._num_obs = TaskClass.num_observations() self._gym = gym self._sim = sim self._gym.set_sim_params(self._sim, sim_params) self._max_episode_steps = max_episode_steps # acquire data to be shared by all environment instances shared_data = EnvClass.create_shared_data(self._gym, self._sim) #self.viewer = gym.create_viewer(sim, gymapi.DEFAULT_VIEWER_WIDTH, gymapi.DEFAULT_VIEWER_HEIGHT) # env bounds lower = gymapi.Vec3(-spacing, 0.0, -spacing) upper = gymapi.Vec3(spacing, spacing, spacing) # create environment and task instances self.envs = [] self.tasks = [] num_per_row = int(sqrt(num_envs)) for i in range(num_envs): # create env instance env_ptr = self._gym.create_env(self._sim, lower, upper, num_per_row) env_base_args = { "gym": self._gym, "env": env_ptr, "env_index": i, } env = EnvClass(shared_data, **env_base_args) self.envs.append(env) # create task instance task_base_args = {"gym": self._gym, "sim": self._sim} task = TaskClass(env, **task_base_args) self.tasks.append(task) # exposing the compute reward to the training algorithm self.compute_reward = task.compute_reward # allocate buffers shared by all envs self.observations = np.zeros((self._num_envs, self._num_obs), dtype=np.float32) self.actions = np.zeros((self._num_envs, self.num_actions()), dtype=np.float32) self.rewards = np.zeros((self._num_envs, ), dtype=np.float32) self.kills = np.zeros((self._num_envs, ), dtype=np.bool_) self.info = np.zeros((self._num_envs, ), dtype=np.bool_) # Workaround to work with original GYM interface self.action_space = types.SimpleNamespace() self.action_space.sample = self.sample_action self.action_space.shape = [self.num_actions()]
def get_random_xz_pos(self): pose = gymapi.Transform() randx = random.uniform(0.5, 0.6) randz = random.uniform(-0.15, 0.15) pose.p = gymapi.Vec3(randx, 0.35, randz) pose.r = gymapi.Quat(0.0, 0.0, 0.0, 0.0) return pose, randx, randz
def setUp(self): try: # initialize gym # initialize gym self.gym = gymapi.acquire_gym() if not self.gym.initialize(): self.assertTrue(False, "Failed to initialize GYM") self.sim = self.gym.create_sim(0, 0) # simulation params sim_params = gymapi.SimParams() sim_params.solver_type = 5 sim_params.num_outer_iterations = 4 sim_params.num_inner_iterations = 10 sim_params.relaxation = 0.75 sim_params.warm_start = 0.4 sim_params.gravity = gymapi.Vec3(0.0, -9.8, 0.0) self.gym.set_sim_params(self.sim, sim_params) # create experiment self.num_envs = FLAGS.num_envs env_spacing = FLAGS.env_spacing self.max_episode_steps = FLAGS.max_episode_steps self.push_exp = rlbase.Experiment(self.gym, self.sim, FrankaEnv, FrankaPush, self.num_envs, env_spacing, self.max_episode_steps, sim_params) if FLAGS.enable_viewer: # create viewer self.viewer = self.gym.create_viewer(self.sim, gymapi.DEFAULT_VIEWER_WIDTH, gymapi.DEFAULT_VIEWER_HEIGHT) if self.viewer is None: self.assertTrue(False, "Failed to create viewer") # set viewer viewpoint0 self.gym.viewer_camera_look_at(self.viewer, self.push_exp.envs[0]._env, gymapi.Vec3(0.5, 1.3, -2.0), gymapi.Vec3(.8, 0, 2)) except Exception as e: self.assertTrue(False, "Error Initializing Simulation: {}".format(e))
def __init__(self, gym, sim, EnvClass, TaskClass, num_envs, spacing, viewer=None, env_param=None, task_param=None, asset_path=None): self._first_reset = True self._num_envs = num_envs self._gym = gym self._sim = sim # acquire data to be shared by all environment instances if asset_path is not None: shared_data = EnvClass.create_shared_data(self._gym, self._sim, data_dir=asset_path) else: shared_data = EnvClass.create_shared_data(self._gym, self._sim) # env bounds lower = gymapi.Vec3(-spacing, 0.0, -spacing) upper = gymapi.Vec3(spacing, spacing, spacing) if viewer is not None: self._viewer = viewer # create environment and task instances self.envs = [] if env_param is None: env_param = dict() if task_param is None: task_param = dict() num_per_row = int(sqrt(num_envs)) for i in range(num_envs): # create env instance env_ptr = self._gym.create_env(self._sim, lower, upper, num_per_row) env_base_args = { "gym": self._gym, "env": env_ptr, "env_index": i, } env = EnvClass(shared_data, viewer=viewer, **env_param, **env_base_args) self.envs.append(env) task_base_args = {"gym": self._gym} self._task = TaskClass(self.envs, **task_param, **task_base_args) self._num_actions = self.envs[0].num_actions() # this is a safe assumption as it is required for vectorized training of any env self._num_obs = self._task.num_observations() self.observation_space = np.array([self._num_obs, ]) self.action_space = np.array([self._num_actions, ])
def __init__(self, shared_data, join_init=None, rev_stiffness=4.0, rev_damping=3.0, prism_stiffness=5.0, prism_damping=0.0, kv=1.0, **base_args): super(FrankaEnv, self).__init__(**base_args) self.dt = 0.01 self.kv = kv self.initial_dof_pos = join_init or [0.0, 0.0, 0.0, -1.75, 0.0, 2.0, 0.8, 0., 0.] self.franka_pose = gymapi.Transform() self.shared_data = shared_data # add franka - 12 Indexes for Rigid Body self.franka_pose.p = gymapi.Vec3(0.0, 0.0, 0.0) self.franka_pose.r = gymapi.Quat(-0.707107, 0.0, 0.0, 0.707107) self.create_actor(self.shared_data.franka_asset, self.franka_pose, "franka", self._env_index, 0) # for retrieving Franka self.franka_handle = self._gym.find_actor_handle(self._env, "franka") # for retrieving finger positions self.f1_handle = self._gym.find_actor_rigid_body_handle(self._env, self.franka_handle, 'panda_leftfinger') self.f2_handle = self._gym.find_actor_rigid_body_handle(self._env, self.franka_handle, 'panda_rightfinger') self.franka_joint_names = self._gym.get_actor_dof_names(self._env, self.franka_handle) self.body_map = {} self.joint_map = {} self.initial_transforms_map = {} self.prev_joint_positions = {} self.joint_velocities = {} # print("franka joints:") # for jn in self.franka_joint_names: # self.joint_map[jn] = self.get_joint_handle("franka", jn) # self.prev_joint_positions[jn] = self.get_joint_position(self.joint_map[jn]) # self.joint_velocities[jn] = 0.0 # #print(jn, " = ", self.prev_joint_positions[jn]) # override default stiffness and damping values for PD control # setup joint stiffness and damping self.franka_dof_props = self._setup_joint_props(rev_stiffness, rev_damping, prism_stiffness, prism_damping) self.step_counter = 0 self.reset()
def main(): # initialize gym gym = gymapi.acquire_gym() if not gym.initialize(): print("*** Failed to initialize gym") exit() sim = gym.create_sim(0, 0) # gymapi.SIM_PHYSX) # create viewer viewer = gym.create_viewer(sim, gymapi.DEFAULT_VIEWER_WIDTH, gymapi.DEFAULT_VIEWER_HEIGHT) if viewer is None: print("*** Failed to create viewer") exit() # simulation params sim_params = gymapi.SimParams() sim_params.solver_type = 5 sim_params.num_outer_iterations = 5 sim_params.num_inner_iterations = 60 sim_params.relaxation = 0.75 sim_params.warm_start = 0.75 sim_params.shape_collision_margin = 0.01 sim_params.contact_regularization = 1e-5 sim_params.deterministic_mode = True sim_params.gravity = gymapi.Vec3(0.0, -9.81, 0.0) gym.set_sim_params(sim, sim_params) dt = 0.01 # create experiment num_envs = 2 env_spacing = 1.5 exp = rlbase.Experiment(gym, sim, FrankaEnv, FrankaPush, num_envs, env_spacing, 50, sim_params) # set viewer viewpoint0 gym.viewer_camera_look_at(viewer, exp.envs[1]._env, gymapi.Vec3(0.4, 1.3, -1.3), gymapi.Vec3(.8, 0, 2)) # create and configure MPPI def set_state(s): for i in range(0, num_envs): exp.envs[i].set_state(*s) def get_state(): return exp.envs[0].get_state() def step(action): obs, rews, _, info = exp.step(action) return obs, rews step_counts = 0 all_obs = [] while not gym.query_viewer_has_closed(viewer): # calculate action with mppi if step_counts == 500: last_obs = [] for i in range(num_envs): last_obs.append(exp.envs[i].get_franka_joint_states()['pos']) print(last_obs[0]) all_obs.append(last_obs) step_counts = 0 exp.reset() step_counts += 1 action = np.random.uniform(-1, 1, size=(num_envs, 7)) # take the action obs, rews = step(action) # draw some debug visualization gym.clear_lines(viewer) for i in range(len(exp.envs)): env = exp.envs[i] task = exp.tasks[i] gym.step_graphics(sim) gym.draw_viewer(viewer, sim, True) gym.shutdown()
def prepare_params(kwargs): # DDPG params ddpg_params = dict() env_name = kwargs['env_name'] if env_name == "FrankaPush-v0": carbgym = gymapi.acquire_gym() if not carbgym.initialize(): print("*** Failed to initialize gym") quit() sim = carbgym.create_sim(0, 0) # simulation params sim_params = gymapi.SimParams() sim_params.solver_type = 5 sim_params.num_outer_iterations = 5 sim_params.num_inner_iterations = 60 sim_params.relaxation = 0.75 sim_params.warm_start = 0.5 sim_params.shape_collision_margin = 0.003 sim_params.contact_regularization = 1e-7 sim_params.deterministic_mode = True sim_params.gravity = gymapi.Vec3(0.0, -9.8, 0.0) # create experiment num_envs = DEFAULT_PARAMS['rollout_batch_size'] env_spacing = 2.5 def make_env(): if env_name == "FrankaPush-v0": env = rlbase.Experiment(carbgym, sim, FrankaEnv, FrankaPush, num_envs, env_spacing, kwargs['steps'], sim_params) return env else: return gym.make(env_name) kwargs['make_env'] = lambda: cached_make_env(make_env, env_name) tmp_env = kwargs['make_env']() assert hasattr(tmp_env, '_max_episode_steps') kwargs['max_episode_steps'] = tmp_env._max_episode_steps tmp_env.reset() kwargs['max_u'] = np.array(kwargs['max_u']) if isinstance( kwargs['max_u'], list) else kwargs['max_u'] kwargs['gamma'] = 1. - 1. / kwargs['max_episode_steps'] if 'lr' in kwargs: kwargs['pi_lr'] = kwargs['lr'] kwargs['Q_lr'] = kwargs['lr'] del kwargs['lr'] for name in [ 'buffer_size', 'hidden', 'layers', 'network_class', 'polyak', 'batch_size', 'Q_lr', 'pi_lr', 'norm_eps', 'norm_clip', 'max_u', 'action_l2', 'clip_obs', 'scope', 'relative_goals' ]: ddpg_params[name] = kwargs[name] kwargs['_' + name] = kwargs[name] del kwargs[name] kwargs['ddpg_params'] = ddpg_params return kwargs
def __init__(self, env, **base_args): super(FrankaPush, self).__init__(**base_args) self.prev_pos = None self.env = env self._gym = env._gym self.distance_threshold = 0.05 self.franka_handle = self._gym.find_actor_handle( self.env._env, "franka") asset_options = gymapi.AssetImportOptions() asset_options.fix_base_link = False # asset_options.flip_visual_attachments = True asset_options.thickness = 0.004 asset_options.armature = 0.002 pose = gymapi.Transform() # add cabinet - 1 Index rigid body pose.p = gymapi.Vec3(0.58, 0.17, 0.0) pose.r = gymapi.Quat(-0.707107, 0.0, 0.0, 0.707107) self.env.create_actor(env.shared_data.table_asset, pose, "table") # add object - 1 Index Rigid body pose, randx, randz = self.get_random_xz_pos() self.env.create_actor(env.shared_data.object_asset, pose, "object", self.env._env_index, 1) # add target # acceptable range x: -0.35 - 0.35 # acceptable range y: -0.35 - 0.035 pose, randx, randz = self.get_random_xz_pos() pose.p.y = 0.3 self.env.create_actor(env.shared_data.target_asset, pose, "target", self.env._env_index, 1) self.goal = np.array([randx, pose.p.y, randz]) self.table_handle = self._gym.find_actor_handle(self.env._env, "table") self.object_handle = self._gym.find_actor_handle( self.env._env, "object") self.target_handle = self._gym.find_actor_handle( self.env._env, "target") self.cube_rigid_shape_prop = self._gym.get_actor_rigid_shape_properties( self.env._env, self.object_handle) self.cube_rigid_body_prop = self._gym.get_actor_rigid_body_properties( self.env._env, self.object_handle) self.table_rigid_shape_prop = self._gym.get_actor_rigid_shape_properties( self.env._env, self.table_handle) self.table_rigid_body_prop = self._gym.get_actor_rigid_body_properties( self.env._env, self.table_handle) self.set_physics("object", "all_friction", 0.3) self.set_physics("table", "all_friction", 0.3) self.set_physics("object", "mass", 1.0) #self.cube_rigid_shape_prop[0].friction = 0.1 #self.cube_rigid_shape_prop[0].rolling_friction = 0.05 #self.cube_rigid_shape_prop[0].torsion_friction = 0.05 self.object_dof_props = self._gym.get_actor_dof_properties( self.env._env, self.object_handle) self.table_dof_props = self._gym.get_actor_dof_properties( self.env._env, self.table_handle) # self.object_dof_props['stiffness'].fill(6000) # self.object_dof_props['damping'].fill(1000) c = 0.5 + 0.5 * np.random.random(3) color = gymapi.Vec3(c[0], c[1], c[2]) self._gym.set_rigid_body_color(self.env._env, self.target_handle, 0, gymapi.MESH_VISUAL_AND_COLLISION, color) color = gymapi.Vec3(0.1, 0.1, 0.5) self._gym.set_rigid_body_color(self.env._env, self.table_handle, 0, gymapi.MESH_VISUAL_AND_COLLISION, color) color = gymapi.Vec3(1.0, 0.5, 0.1) self._gym.set_rigid_body_color(self.env._env, self.object_handle, 0, gymapi.MESH_VISUAL_AND_COLLISION, color) self._gym.set_actor_dof_properties(self.env._env, self.object_handle, self.object_dof_props) self._gym.set_actor_dof_properties(self.env._env, self.table_handle, self.table_dof_props)