def launch(args): # create the ddpg_agent # env = gym.make(args.env_name) rend = True discreteAction = 0 numControlledJoints = 6 fixed = False actionRepeat = 1 reward_type = args.reward_type if args.env_name.startswith('reach'): env = kukaReachGymEnvHer(urdfRoot=robot_data.getDataPath(),actionRepeat=actionRepeat,renders=rend, useIK=0, isDiscrete=discreteAction, numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True, reward_type=reward_type) elif args.env_name.startswith('push'): env = kukaPushGymEnvHer(urdfRoot=robot_data.getDataPath(),actionRepeat=actionRepeat,renders=rend, useIK=0, isDiscrete=discreteAction, numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True, reward_type=reward_type) elif args.env_name.startswith('obreach'): env = kukaReachGymEnvOb(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction, numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True, reward_type=reward_type,obstacles_num=args.obs_num) elif args.env_name.startswith('blockreach'): env = kukaReachGymEnvblock(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction, numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True, reward_type=reward_type) else: env = kukaReachGymEnvHer(urdfRoot=robot_data.getDataPath(),actionRepeat=actionRepeat,renders=rend, useIK=0, isDiscrete=discreteAction, numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True, reward_type=reward_type) # get the environment parameters env_params = get_env_params(env, actionRepeat) # create the ddpg agent to interact with the environment ddpg_trainer = ddpg(args, env, env_params) ddpg_trainer.learn()
def __init__(self, urdfRootPath=robot_data.getDataPath(), timeStep=0.01, useInverseKinematics=0, arm='l', useOrientation=0): self.urdfRootPath = os.path.join(urdfRootPath, "iCub/icub_fixed_model.sdf") self.timeStep = timeStep self.useInverseKinematics = useInverseKinematics self.useOrientation = useOrientation self.useSimulation = 1 self.indices_torso = range(12, 15) self.indices_left_arm = range(15, 22) self.indices_right_arm = range(25, 32) self.indices_head = range(22, 25) self.home_pos_torso = [0.0, 0.0, 0.0] #degrees self.home_pos_head = [0.47, 0, 0] self.home_left_arm = [-29.4, 40.0, 0, 70, 0, 0, 0] self.home_right_arm = [-29.4, 40.0, 0, 70, 0, 0, 0] self.workspace_lim = [[0.25, 0.52], [-0.2, 0.2], [0.5, 1.0]] self.control_arm = arm if arm == 'r' or arm == 'l' else 'l' #left arm by default self.reset()
def __init__(self, urdfRootPath=robot_data.getDataPath(), timeStep=0.01, useInverseKinematics=0, basePosition=[-0.6, -0.4, 0.625], useFixedBase=True, action_space=6, includeVelObs=True, init_joints=[0,-0.5,1.5,0,0,0]): # 加载机械臂 self.urdfRootPath = os.path.join(urdfRootPath, "kuka_kr6_support/urdf/kr6r700sixx.urdf") self.timeStep = timeStep self.useInverseKinematics = useInverseKinematics self.useNullSpace = 0 self.useOrientation = 1 self.useSimulation = 1 self.basePosition = basePosition self.useFixedBase = useFixedBase # 目标位置生成范围 self.workspace_lim = [[0.3, 0.60], [-0.3, 0.3], [0, 1]] self.workspace_lim_endEff = [[0.1, 0.70], [-0.4, 0.4], [0.65, 1]] self.endEffLink = 7 self.action_space = action_space self.includeVelObs = includeVelObs # 机械臂自由度 self.numJoints = 6 self.init_joints = init_joints self.reset()
def __init__(self, urdfRoot=robot_data.getDataPath(), useIK=0, isDiscrete=0, actionRepeat=1, renders=False, maxSteps=1000, numControlledJoints=6, fixedPositionObj=False, includeVelObs=True, reward_type = 1): super().__init__(urdfRoot, useIK, isDiscrete, actionRepeat, renders, maxSteps, numControlledJoints, fixedPositionObj, includeVelObs) self.reward_type = reward_type self.reset() observation_dim = len(self._observation['observation']) self.observation_space = spaces.Dict({ 'observation': spaces.Box(-largeValObservation, largeValObservation, shape=(observation_dim,), dtype=np.float32), 'achieved_goal': spaces.Box(-largeValObservation, largeValObservation, shape=(3,), dtype=np.float32), 'desired_goal': spaces.Box(-largeValObservation, largeValObservation, shape=(3,), dtype=np.float32) }) if (self._isDiscrete): self.action_space = spaces.Discrete( self._kuka.getActionDimension()) else: self._action_bound = 1 action_high = np.array([self._action_bound] * self.action_dim) self.action_space = spaces.Box(-action_high, action_high, dtype='float32') self.viewer = None
def main(argv): numControlledJoints = 6 fixed = False normalize_observations = False gamma = 0.9 batch_size = 16 memory_limit = 1000000 normalize_returns = True timesteps = 1000000 policy_name = "reaching_policy" discreteAction = 0 rend = False kukaenv = kukaReachGymEnvHer(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction, numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True) kukaenv = Monitor(kukaenv, log_dir, allow_early_resets=True) n_actions = kukaenv.action_space.shape[-1] param_noise = None action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) model_class = DDPG goal_selection_strategy = 'future' model = HER(CustomPolicy, kukaenv, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, verbose=1, tensorboard_log= "../pybullet_logs/kuka_reach_ddpg/reaching_DDPG_HER_PHASE", buffer_size=1000000, batch_size=64, random_exploration=0.3, action_noise=action_noise) print(colored("-----Timesteps:", "red")) print(colored(timesteps, "red")) print(colored("-----Number Joints Controlled:", "red")) print(colored(numControlledJoints, "red")) print(colored("-----Object Position Fixed:", "red")) print(colored(fixed, "red")) print(colored("-----Policy Name:", "red")) print(colored(policy_name, "red")) print(colored("------", "red")) print(colored("Launch the script with -h for further info", "red")) model.learn(total_timesteps=timesteps, log_interval=100, callback=callback) print("Saving model to kuka.pkl") model.save("../pybullet_logs/kukareach_ddpg_her/" + policy_name) del model # remove to demonstrate saving and loading
def launch(args): # create the ddpg_agent # env = gym.make(args.env_name) rend = False discreteAction = 0 numControlledJoints = 9 actionRepeat = 1 fixed = False maxStep = 1000 # env = kukaReachGymEnvHer(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction, # numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True) env = kukaPickGymEnvHer(urdfRoot=robot_data.getDataPath(), maxSteps=maxStep, renders=rend, useIK=0, isDiscrete=discreteAction, actionRepeat=actionRepeat, numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True, reward_type=1) # get the environment parameters env_params = get_env_params(env) # create the ddpg agent to interact with the environment args.replay_strategy = 'normal' ddpg_trainer = ddpg(args, env, env_params) ddpg_trainer.learn()
def __init__(self, urdfRootPath=robot_data.getDataPath(), timeStep=0.01, useInverseKinematics=0, basePosition=[-0.6, -0.4, 0.625], useFixedBase=True, action_space=7, includeVelObs=True): self.urdfRootPath = os.path.join( urdfRootPath, "franka_description/robots/panda_arm_physics.urdf") self.timeStep = timeStep self.useInverseKinematics = useInverseKinematics self.useNullSpace = 0 self.useOrientation = 1 self.useSimulation = 1 self.basePosition = basePosition self.useFixedBase = useFixedBase self.workspace_lim = [[0.3, 0.60], [-0.3, 0.3], [0, 1]] self.workspace_lim_endEff = [[0.1, 0.70], [-0.4, 0.4], [0.65, 1]] self.endEffLink = 8 self.action_space = action_space self.includeVelObs = includeVelObs self.numJoints = 7 self.reset()
def main(): # Open GUI and set pybullet_data in the path p.connect(p.GUI) #p.setAdditionalSearchPath(pybullet_data.getDataPath()) # Load plane contained in pybullet_data p.loadURDF(os.path.join(pybullet_data.getDataPath(),"plane.urdf")) # Set gravity for simulation p.setGravity(0,0,-9.8) # load robot model robotIds = p.loadSDF(os.path.join(robot_data.getDataPath(), "iCub/icub_fixed_model.sdf")) icubId = robotIds[0] # set constraint between base_link and world p.createConstraint(icubId,-1,-1,-1,p.JOINT_FIXED,[0,0,0],[0,0,0], [p.getBasePositionAndOrientation(icubId)[0][0], p.getBasePositionAndOrientation(icubId)[0][1], p.getBasePositionAndOrientation(icubId)[0][2]*1.2], p.getBasePositionAndOrientation(icubId)[1]) ##init_pos for standing # without FT_sensors init_pos = [0]*15 + [-29.4, 28.8, 0, 44.59, 0, 0, 0, 0.47, 0, 0, -29.4, 30.4, 0, 44.59, 0, 0, 0] # with FT_sensors #init_pos = [0]*19 + [-29.4, 28.8, 0, 0, 44.59, 0, 0, 0, 0.47, 0, 0, -29.4, 30.4, 0, 0, 44.59, 0, 0, 0] # all set to zero #init_pos = [0]*p.getNumJoints(icubId) # Load other objects p.loadURDF(os.path.join(pybullet_data.getDataPath(),"table/table.urdf"), [1.1, 0.0, 0.0]) p.loadURDF(os.path.join(pybullet_data.getDataPath(), "lego/lego.urdf"), [0.5,0.0,0.8]) # add debug slider jointIds=[] paramIds=[] joints_num = p.getNumJoints(icubId) print("len init_pos ",len(init_pos)) print("len num joints", joints_num) for j in range (joints_num): info = p.getJointInfo(icubId,j) jointName = info[1] #jointType = info[2] jointIds.append(j) paramIds.append(p.addUserDebugParameter(jointName.decode("utf-8"), info[8], info[9], init_pos[j]/180*m.pi)) while True: for i in range(joints_num): p.setJointMotorControl2(icubId, i, p.POSITION_CONTROL, targetPosition=p.readUserDebugParameter(i), targetVelocity=0.0, positionGain=0.25, velocityGain=0.75, force=50) p.stepSimulation() time.sleep(0.01)
def __init__(self, urdfRoot=robot_data.getDataPath(), useIK=0, isDiscrete=0, actionRepeat=1, renders=False, maxSteps=1000, dist_delta=0.03, numControlledJoints=6, fixedPositionObj=False, includeVelObs=True, reward_type = 1): super().__init__(urdfRoot, useIK, isDiscrete, actionRepeat, renders, maxSteps, dist_delta, numControlledJoints, fixedPositionObj, includeVelObs) self.reward_type = reward_type self.reset() observationDim = len(self._observation) observation_high = np.array([largeValObservation] * observationDim) self.observation_space = spaces.Box(-observation_high, observation_high, dtype='float32') if (self._isDiscrete): self.action_space = spaces.Discrete( self._kuka.getActionDimension()) else: # self.action_dim = 2 #self._kuka.getActionDimension() self._action_bound = 1 action_high = np.array([self._action_bound] * self.action_dim) self.action_space = spaces.Box(-action_high, action_high, dtype='float32') self.viewer = None
def __init__(self, urdfRoot=robot_data.getDataPath(), actionRepeat=4, useIK=1, isDiscrete=0, control_arm='l', useOrientation=0, rnd_obj_pose=1, renders=False, maxSteps = 2000, reward_type = 1): self._control_arm=control_arm self._isDiscrete = isDiscrete self._timeStep = 1./240. self._useIK = 1 if self._isDiscrete else useIK self._useOrientation = useOrientation self._urdfRoot = urdfRoot self._actionRepeat = actionRepeat self._observation = [] self._envStepCounter = 0 self._renders = renders self._maxSteps = maxSteps self.terminated = 0 self._cam_dist = 1.3 self._cam_yaw = 180 self._cam_pitch = -40 self._h_table = [] self._target_dist_min = 0.03 self._rnd_obj_pose = rnd_obj_pose self._reward_type = reward_type # Initialize PyBullet simulator self._p = p if self._renders: self._cid = p.connect(p.SHARED_MEMORY) if (self._cid<0): self._cid = p.connect(p.GUI) p.resetDebugVisualizerCamera(2.5,90,-60,[0.0,-0.0,-0.0]) else: self._cid = p.connect(p.DIRECT) # initialize simulation environment self.seed() self.reset() observationDim = len(self._observation) observation_high = np.array([largeValObservation] * observationDim) self.observation_space = spaces.Box(-observation_high, observation_high, dtype='float32') if (self._isDiscrete): self.action_space = spaces.Discrete(13) if self._icub.useOrientation else spaces.Discrete(7) else: action_dim = self._icub.getActionDimension() self._action_bound = 1 action_high = np.array([self._action_bound] * action_dim) self.action_space = spaces.Box(-action_high, action_high, dtype='float32') self.viewer = None
def main(): model_class = DDPG # works also with SAC and DDPG # -j action_space = 7 # -p fixed = True # -o normalize_observations = False # -g gamma = 0.9 # -b #batch_size = 16 # -m memory_limit = 1000000 # -r normalize_returns = True # -t timesteps = 1000000 policy_name = "pushing_policy" discreteAction = 0 rend = False env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction, action_space=action_space, fixedPositionObj=fixed, includeVelObs=True) # Available strategies (cf paper): future, final, episode, random goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE n_actions = env.action_space.shape[-1] action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) # Wrap the model model = HER( CustomPolicy, env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, verbose=1, tensorboard_log= "../pybullet_logs/panda_push_ddpg/stable_baselines/DDPG+HER_FIXED_DYN_RAND", buffer_size=1000000, batch_size=256, random_exploration=0.3, action_noise=action_noise) # Train the model starting from a previous policy model.learn(timesteps) print("Saving Policy") model.save("../policies/pushing_fixed_HER_Dyn_Rand")
def main(argv): # -p fixed = False # -j numControlledJoints = 7 # -n policy_name = "pushing_policy" # COMMAND LINE PARAMS MANAGEMENT: try: opts, args = getopt.getopt(argv, "hj:p:n:", ["j=", "p=", "n="]) except getopt.GetoptError: print('test.py -j <numJoints> -p <fixedPoseObject> -p <policy_name> ') sys.exit(2) for opt, arg in opts: if opt == '-h': print('------------------ Default values:') print( 'test.py -j <numJoints: 7> -p <fixedPoseObject: False> -n <policy_name:"pushing_policy"> ' ) print('------------------') return 0 sys.exit() elif opt in ("-j", "--j"): if (numControlledJoints > 7): print("Check dimension state") return 0 else: numControlledJoints = int(arg) elif opt in ("-p", "--p"): fixed = bool(arg) elif opt in ("-n", "--n"): policy_name = str(arg) print(colored("-----Number Joints Controlled:", "red")) print(colored(numControlledJoints, "red")) print(colored("-----Object Position Fixed:", "red")) print(colored(fixed, "red")) print(colored("-----Policy Name:", "red")) print(colored(policy_name, "red")) print(colored("------", "red")) print(colored("Launch the script with -h for further info", "red")) model = DDPG.load(policy_name) pandaenv = pandaPushGymEnv(urdfRoot=robot_data.getDataPath(), renders=True, useIK=0, numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True) obs = pandaenv.reset() while True: action, _states = model.predict(obs) obs, rewards, dones, info = pandaenv.step(action)
def main(): p.connect(p.GUI) p.loadURDF(os.path.join(pybullet_data.getDataPath(), "plane.urdf")) # Set gravity for simulation p.setGravity(0, 0, -9.8) # load robot model cube_start_pos = [0, 0, 1] robot_id = p.loadURDF(os.path.join(robot_data.getDataPath(), "darwinop/darwinOP.urdf"), cube_start_pos, globalScaling=2) grav_id = p.addUserDebugParameter("gravity", -10, 10, -10) joint_ids = [] param_ids = [] for joint in range(p.getNumJoints(robot_id)): joint_info = p.getJointInfo(robot_id, joint) print('{num} | {name}'.format(num=joint_info[0], name=joint_info[1].decode('utf-8'))) joint_name = joint_info[1] joint_type = joint_info[2] joint_lower_limit = joint_info[8] joint_upper_limit = joint_info[9] if joint_type == p.JOINT_PRISMATIC or joint_type == p.JOINT_REVOLUTE: joint_ids.append(joint) param_ids.append( p.addUserDebugParameter(joint_name.decode("utf-8"), joint_lower_limit, joint_upper_limit, 0)) p.setRealTimeSimulation(1) while True: time.sleep(0.01) p.setGravity(0, 0, p.readUserDebugParameter(grav_id)) for i in range(len(param_ids)): config = param_ids[i] target_pos = p.readUserDebugParameter(config) p.setJointMotorControl2(robot_id, joint_ids[i], p.POSITION_CONTROL, target_pos, force=5 * 240.)
def __init__(self, urdfRoot=robot_data.getDataPath(), useIK=0, isDiscrete=0, actionRepeat=1, renders=False, maxSteps=1000, numControlledJoints=6, fixedPositionObj=False, includeVelObs=True, init_joints=[0, -0.5, 1.5, 0, 0, 0]): self.action_dim = numControlledJoints self._isDiscrete = isDiscrete self._timeStep = 1. / 240. #刷新率 240HZ self._useIK = useIK self._urdfRoot = urdfRoot self._actionRepeat = actionRepeat self._observation = [] self._envStepCounter = 0 self._renders = renders self._maxSteps = maxSteps self.terminated = False self._cam_dist = 1.3 self._cam_yaw = 180 self._cam_pitch = -40 self._h_table = [] self._target_dist_min = 0.1 # 与任务成功的判定有关 self._p = p self.fixedPositionObj = fixedPositionObj self.includeVelObs = includeVelObs self.ws_lim = [[0.3, 0.5], [-0.2, 0.2], [0, 1]] self.init_joints = init_joints if self._renders: cid = p.connect(p.SHARED_MEMORY) if (cid < 0): cid = p.connect(p.GUI) p.resetDebugVisualizerCamera(2.5, 90, -60, [0.52, -0.2, -0.33]) else: p.connect(p.DIRECT)
def main(load_policy=False): global log_dir, log_dir_policy if (load_policy): log_dir_policy = '../policies/PUSHING_TD3+HER_FIXED_POSITION_DYN_RAND_FROM_FIXED_PHYSICS' model_class = TD3 # works also with SAC and DDPG action_space = 7 fixed = True normalize_observations = False gamma = 0.9 memory_limit = 1000000 normalize_returns = True timesteps = 1500000 discreteAction = 0 rend = False env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction, action_space = action_space, fixedPositionObj = fixed, includeVelObs = True) env = Monitor(env, log_dir, allow_early_resets=True) # Available strategies (cf paper): future, final, episode, random goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE n_actions = env.action_space.shape[-1] action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) # Wrap the model model = HER(CustomPolicy, env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, verbose=1,tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3+HER_FIXED_DYN_RAND", buffer_size=1000000,batch_size=256, random_exploration=0.3, action_noise=action_noise) if (load_policy): model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3+HER_FIXED_DYN_RAND_FROM_FIXED_PHYSICS", buffer_size=1000000,batch_size=256,random_exploration=0.3, action_noise=action_noise) # Train the model starting from a previous policy model.learn(timesteps, callback = callback ) model.save("../policies/PUSHING_FIXED_TD3_DYN_RAND") print("Finished train1")
def main(load_policy=False): global log_dir model_class = TD3 # works also with SAC and DDPG action_space = 6 fixed = True #0 completely fixed, 1 slightly random radius, 2 big random radius, object_position = 1 normalize_observations = False gamma = 0.9 memory_limit = 1000000 normalize_returns = True timesteps = 5000000 discreteAction = 0 rend = False env = pandaPushGymEnvHER(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=1, isDiscrete=discreteAction, action_space = action_space, fixedPositionObj = fixed, includeVelObs = True, object_position=object_position) env = Monitor(env, log_dir, allow_early_resets=True) goal_selection_strategy = 'future' n_actions = env.action_space.shape[-1] action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) # Wrap the model model = HER(CustomTD3Policy, env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, verbose=1,tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/PUSHING_TD3+HER_FIXED_POSITION_PHASE_1_IK", buffer_size=1000000,batch_size=256, random_exploration=0.3, action_noise=action_noise) if (load_policy): model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/PUSHING_TD3+HER_FIXED_POSITION_PHASE_1_IK", buffer_size=1000000,batch_size=256,random_exploration=0.3, action_noise=action_noise) model.learn(timesteps,log_interval=100, callback = callback) print("Saving Policy PHASE_1") model.save("../policies/PUSHING_TD3+HER_FIXED_POSITION_PHASE_1_IK")
# Load plane contained in pybullet_data planeId = p.loadURDF("plane.urdf") # Set gravity for simulation p.setGravity(0, 0, -9.8) dir_path = os.path.dirname(os.path.realpath(__file__)) for root, dirs, files in os.walk(os.path.dirname(dir_path)): for file in files: if file.endswith('.urdf'): print(root) p.setAdditionalSearchPath(root) kukaId = p.loadURDF( os.path.join(robot_data.getDataPath(), "kuka_kr6_support/urdf/kr6r700sixx.urdf")) # kukaId = p.loadURDF(os.path.join(robot_data.getDataPath(),"kuka_kr6_support/urdf/kuka_model.urdf")) # set constraint between base_link and world cid = p.createConstraint( kukaId, -1, -1, -1, p.JOINT_FIXED, [0, 0, 0], [0, 0, 0], [ p.getBasePositionAndOrientation(kukaId)[0][0], p.getBasePositionAndOrientation(kukaId)[0][1], p.getBasePositionAndOrientation(kukaId)[0][2] * 1.2 ], p.getBasePositionAndOrientation(kukaId)[1]) numJoints = p.getNumJoints(kukaId) print(numJoints) for i in range(numJoints):
args = get_args() # load the model param model_path = args.save_dir + args.env_name + '/model.pt' print('model path:', model_path) o_mean, o_std, g_mean, g_std, model = torch.load( model_path, map_location=lambda storage, loc: storage) # create the environment # env = gym.make(args.env_name) rend = True discreteAction = 0 numControlledJoints = 6 fixed = False actionRepeat = 1 reward_type = 1 if args.env_name.startswith('reach'): env = kukaReachGymEnvHer(urdfRoot=robot_data.getDataPath(), maxSteps=int(1e10), actionRepeat=actionRepeat, renders=rend, useIK=0, isDiscrete=discreteAction, numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True, reward_type=reward_type) elif args.env_name.startswith('push'): env = kukaPushGymEnvHer(urdfRoot=robot_data.getDataPath(), actionRepeat=actionRepeat, renders=rend, useIK=0, isDiscrete=discreteAction,
# Open GUI and set pybullet_data in the path p.connect(p.GUI) p.setAdditionalSearchPath(pybullet_data.getDataPath()) # Load plane contained in pybullet_data planeId = p.loadURDF("plane.urdf") # Set gravity for simulation p.setGravity(0,0,-9.8) dir_path = os.path.dirname(os.path.realpath(__file__)) for root, dirs, files in os.walk(os.path.dirname(dir_path)): for file in files: if file.endswith('.urdf'): print (root) p.setAdditionalSearchPath(root) pandaId = p.loadURDF(os.path.join(robot_data.getDataPath(),"franka_description/robots/panda_arm_physics.urdf")) # set constraint between base_link and world cid = p.createConstraint(pandaId,-1,-1,-1,p.JOINT_FIXED,[0,0,0],[0,0,0], [p.getBasePositionAndOrientation(pandaId)[0][0], p.getBasePositionAndOrientation(pandaId)[0][1], p.getBasePositionAndOrientation(pandaId)[0][2]*1.2], p.getBasePositionAndOrientation(pandaId)[1]) while True: p.stepSimulation() time.sleep(0.01)
def __init__(self, urdfRoot=robot_data.getDataPath(), useIK=0, isDiscrete=0, actionRepeat=1, renders=False, maxSteps=1000, dist_delta=0.03, numControlledJoints=7, fixedPositionObj=True, includeVelObs=True): self.action_dim = numControlledJoints self._isDiscrete = isDiscrete self._timeStep = 1. / 240. self._useIK = useIK self._urdfRoot = urdfRoot self._actionRepeat = actionRepeat self._observation = [] self._envStepCounter = 0 self._renders = renders self._maxSteps = maxSteps self.terminated = False self._cam_dist = 1.3 self._cam_yaw = 180 self._cam_pitch = -40 self._h_table = [] self._target_dist_max = 0.3 self._target_dist_min = 0.2 self._p = p self.fixedPositionObj = fixedPositionObj self.includeVelObs = includeVelObs if self._renders: cid = p.connect(p.SHARED_MEMORY) if (cid < 0): cid = p.connect(p.GUI) p.resetDebugVisualizerCamera(2.5, 90, -60, [0.52, -0.2, -0.33]) else: p.connect(p.DIRECT) #self.seed() # initialize simulation environment self.reset() observationDim = len(self._observation) observation_high = np.array([largeValObservation] * observationDim) self.observation_space = spaces.Box(-observation_high, observation_high, dtype='float32') if (self._isDiscrete): self.action_space = spaces.Discrete( self._panda.getActionDimension()) else: #self.action_dim = 2 #self._panda.getActionDimension() self._action_bound = 1 action_high = np.array([self._action_bound] * self.action_dim) self.action_space = spaces.Box(-action_high, action_high, dtype='float32') self.viewer = None
import os, inspect currentdir = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) print(currentdir) parentdir = os.path.dirname(os.path.dirname(currentdir)) os.sys.path.insert(0, parentdir) import pybullet as p import pybullet_data import robot_data import time import math as m print('robot_data.getDataPath()', robot_data.getDataPath()) def main(): p.connect(p.GUI) p.loadURDF(os.path.join(pybullet_data.getDataPath(), "plane.urdf")) # Set gravity for simulation p.setGravity(0, 0, -9.8) # load robot model cube_start_pos = [0, 0, 1] robot_id = p.loadURDF(os.path.join(robot_data.getDataPath(), "darwinop/darwinOP.urdf"), cube_start_pos, globalScaling=2)
def __init__(self, urdfRoot=robot_data.getDataPath(), useIK=0, isDiscrete=0, actionRepeat=1, renders=False, maxSteps=1000, dist_delta=0.03, action_space=7, fixedPositionObj=True, includeVelObs=True, object_position=0, test_phase=False, alg='ddpg', max_episode_steps=1000, type_physics=0): self.object_position = object_position self.action_dim = action_space self._isDiscrete = isDiscrete self._param_lambda = 1 / np.random.uniform(125, 1000) self._timeStep = (1.0 / 240.0) + np.random.exponential( self._param_lambda) self._useIK = useIK self._urdfRoot = urdfRoot self._actionRepeat = actionRepeat self._observation = [] self._envStepCounter = 0 self._renders = renders self._maxSteps = maxSteps self._cam_dist = 1.3 self._cam_yaw = 180 self._cam_pitch = -40 self._h_table = [] self._target_dist_max = 0.3 self._target_dist_min = 0.1 self._p = p self.fixedPositionObj = fixedPositionObj self.includeVelObs = includeVelObs self.test_phase = test_phase self.alg = alg self.max_episode_steps = max_episode_steps self.type_physics = type_physics if self._renders: cid = p.connect(p.SHARED_MEMORY) if (cid < 0): cid = p.connect(p.GUI) p.resetDebugVisualizerCamera(2.5, 90, -60, [0.52, -0.2, -0.33]) else: p.connect(p.DIRECT) # self.seed() # initialize simulation environment self._observation = self.reset() observation_dim = len(self._observation['observation']) self.observation_space = spaces.Dict({ 'observation': spaces.Box(-largeValObservation, largeValObservation, shape=(observation_dim, ), dtype=np.float32), #the archieved goal is the position reached with the object in space 'achieved_goal': spaces.Box(-largeValObservation, largeValObservation, shape=(3, ), dtype=np.float32), #the desired goal is the desired position in space 'desired_goal': spaces.Box(-largeValObservation, largeValObservation, shape=(3, ), dtype=np.float32) }) print(self.observation_space) if (self._isDiscrete): self.action_space = spaces.Discrete( self._panda.getActionDimension()) else: #self.action_dim = 2 #self._panda.getActionDimension() self._action_bound = 1 action_high = np.array([self._action_bound] * self.action_dim) self.action_space = spaces.Box(-action_high, action_high, dtype=np.float32) self.viewer = None
def main(argv): # -j numControlledJoints = 7 # -p fixed = False # -o normalize_observations = False # -g gamma = 0.9 # -b batch_size = 128 # -m memory_limit = 1000000 # -r normalize_returns = True # -t timesteps = 10000000 policy_name = "pushing_policy" # COMMAND LINE PARAMS MANAGEMENT: try: opts, args = getopt.getopt(argv,"hj:p:g:b:m:r:o:t:n:",["j=","p=","g=","b=","m=","r=","o=","t=","n="]) except getopt.GetoptError: print ('train.py -t <timesteps> -j <numJoints> -p <fixedPoseObject> -n <policy_name> -g <gamma> -b <batchsize> -m <memory_limit> -r <norm_ret> -o <norm_obs> -p <policy_name>') sys.exit(2) for opt, arg in opts: if opt == '-h': print('------------------ Default values:') print('train.py -t <timesteps: 10000000> -j <numJoints: 7> -p <fixedPoseObject: False> -n <policy_name:"pushing_policy"> -g <gamma: 0.9> -b <batch_size: 16> -m <memory_limit: 1000000> -r <norm_ret: True> -o <norm_obs: False> ') print('------------------') return 0 sys.exit() elif opt in ("-j", "--j"): if(numControlledJoints >7): print("check dim state") return 0 else: numControlledJoints = int(arg) elif opt in ("-p", "--p"): fixed = bool(arg) elif opt in ("-g", "--g"): gamma = float(arg) elif opt in ("-o", "--o"): normalize_observations = bool(arg) elif opt in ("-b", "--b"): batch_size = int(arg) elif opt in ("-m", "--m"): memory_limit = int(arg) elif opt in ("-r", "--r"): normalize_returns = bool(arg) elif opt in ("-t", "--t"): timesteps = int(arg) elif opt in ("-n","--n"): policy_name = str(arg) discreteAction = 0 rend = False pandaenv = pandaPushGymEnv(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction, numControlledJoints = numControlledJoints, fixedPositionObj = fixed, includeVelObs = True) n_actions = pandaenv.action_space.shape[-1] param_noise = None action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) pandaenv = DummyVecEnv([lambda: pandaenv]) model = DDPG(LnMlpPolicy, pandaenv,normalize_observations = normalize_observations, gamma=gamma,batch_size=batch_size, memory_limit=memory_limit, normalize_returns = normalize_returns, verbose=1, param_noise=param_noise, action_noise=action_noise, tensorboard_log="../pybullet_logs/pandareach_ddpg/", reward_scale = 1) print(colored("-----Timesteps:","red")) print(colored(timesteps,"red")) print(colored("-----Number Joints Controlled:","red")) print(colored(numControlledJoints,"red")) print(colored("-----Object Position Fixed:","red")) print(colored(fixed,"red")) print(colored("-----Policy Name:","red")) print(colored(policy_name,"red")) print(colored("------","red")) print(colored("Launch the script with -h for further info","red")) model.learn(total_timesteps=timesteps) print("Saving model to panda.pkl") model.save("../pybullet_logs/pandareach_ddpg/policies"+ policy_name) del model # remove to demonstrate saving and loading
normalize_observations = False # -g gamma = 0.9 # -b batch_size = 16 # -m memory_limit = 1000000 # -r normalize_returns = True # -t timesteps = 10000000 policy_name = "pushing_policy" discreteAction = 0 rend = True env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction, numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True) # Wrap the model model = HER.load("../policies/pushing_fixed_HER_Dyn_Rand0.pkl", env=env) obs = env.reset() for i in range(10000): action, _ = model.predict(obs) obs, reward, done, _ = env.step(action)
normalize_observations = False # -g gamma = 0.9 # -b batch_size = 16 # -m memory_limit = 1000000 # -r normalize_returns = True # -t timesteps = 1000000 policy_name = "pushing_policy" discreteAction = 0 rend = True env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction, action_space = action_space, fixedPositionObj = fixed, includeVelObs = True, object_position=0, test_phase = True, alg = 'td3_normal_policy_to_different_physics', type_physics=2, max_episode_steps=500) goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE # Wrap the model model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env) obs = env.reset() for _ in range(10000): action, _ = model.predict(obs) obs, reward, done, _ = env.step(action) if done: obs = env.reset()
def main(argv): numControlledJoints = 6 fixed = False normalize_observations = False gamma = 0.9 batch_size = 16 memory_limit = 1000000 normalize_returns = True timesteps = 1000000 policy_name = "reaching_policy" # COMMAND LINE PARAMS MANAGEMENT: discreteAction = 0 # rend = True rend = False kukaenv = kukaReachGymEnv(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction, numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True) n_actions = kukaenv.action_space.shape[-1] param_noise = None action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) kukaenv = DummyVecEnv([lambda: kukaenv]) full_log = True model = DDPG( LnMlpPolicy, kukaenv, normalize_observations=normalize_observations, gamma=gamma, batch_size=batch_size, memory_limit=memory_limit, normalize_returns=normalize_returns, verbose=1, param_noise=param_noise, action_noise=action_noise, tensorboard_log= "../pybullet_logs/kuka_reach_ddpg/reaching_obstacle_DDPG_PHASE_1", full_tensorboard_log=full_log, reward_scale=1) print(colored("-----Timesteps:", "red")) print(colored(timesteps, "red")) print(colored("-----Number Joints Controlled:", "red")) print(colored(numControlledJoints, "red")) print(colored("-----Object Position Fixed:", "red")) print(colored(fixed, "red")) print(colored("-----Policy Name:", "red")) print(colored(policy_name, "red")) print(colored("------", "red")) print(colored("Launch the script with -h for further info", "red")) model.learn(total_timesteps=timesteps) print("Saving model to kuka.pkl") model.save("../pybullet_logs/kukareach_obstacle_ddpg/" + policy_name) del model # remove to demonstrate saving and loading