Ejemplo n.º 1
0
def launch(args):
    # create the ddpg_agent
    # env = gym.make(args.env_name)
    rend = True
    discreteAction = 0
    numControlledJoints = 6
    fixed = False
    actionRepeat = 1
    reward_type = args.reward_type
    if args.env_name.startswith('reach'):
        env = kukaReachGymEnvHer(urdfRoot=robot_data.getDataPath(),actionRepeat=actionRepeat,renders=rend, useIK=0, isDiscrete=discreteAction,
                                numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True, reward_type=reward_type)
    elif args.env_name.startswith('push'):
        env = kukaPushGymEnvHer(urdfRoot=robot_data.getDataPath(),actionRepeat=actionRepeat,renders=rend, useIK=0, isDiscrete=discreteAction,
                                numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True, reward_type=reward_type)
    elif args.env_name.startswith('obreach'):
        env = kukaReachGymEnvOb(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction,
                                numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True, 
                                reward_type=reward_type,obstacles_num=args.obs_num)
    elif args.env_name.startswith('blockreach'):
        env = kukaReachGymEnvblock(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction,
                                numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True, 
                                reward_type=reward_type)
    else:
        env = kukaReachGymEnvHer(urdfRoot=robot_data.getDataPath(),actionRepeat=actionRepeat,renders=rend, useIK=0, isDiscrete=discreteAction,
                                numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True, reward_type=reward_type)
                             
    # get the environment parameters
    env_params = get_env_params(env, actionRepeat)
    # create the ddpg agent to interact with the environment
    ddpg_trainer = ddpg(args, env, env_params)
    ddpg_trainer.learn()
Ejemplo n.º 2
0
    def __init__(self,
                 urdfRootPath=robot_data.getDataPath(),
                 timeStep=0.01,
                 useInverseKinematics=0,
                 arm='l',
                 useOrientation=0):

        self.urdfRootPath = os.path.join(urdfRootPath,
                                         "iCub/icub_fixed_model.sdf")
        self.timeStep = timeStep
        self.useInverseKinematics = useInverseKinematics
        self.useOrientation = useOrientation
        self.useSimulation = 1

        self.indices_torso = range(12, 15)
        self.indices_left_arm = range(15, 22)
        self.indices_right_arm = range(25, 32)
        self.indices_head = range(22, 25)

        self.home_pos_torso = [0.0, 0.0, 0.0]  #degrees
        self.home_pos_head = [0.47, 0, 0]

        self.home_left_arm = [-29.4, 40.0, 0, 70, 0, 0, 0]
        self.home_right_arm = [-29.4, 40.0, 0, 70, 0, 0, 0]

        self.workspace_lim = [[0.25, 0.52], [-0.2, 0.2], [0.5, 1.0]]

        self.control_arm = arm if arm == 'r' or arm == 'l' else 'l'  #left arm by default

        self.reset()
Ejemplo n.º 3
0
 def __init__(self, 
             urdfRootPath=robot_data.getDataPath(), 
             timeStep=0.01, 
             useInverseKinematics=0, 
             basePosition=[-0.6, -0.4, 0.625], 
             useFixedBase=True, 
             action_space=6, 
             includeVelObs=True,
             init_joints=[0,-0.5,1.5,0,0,0]):
     # 加载机械臂
     self.urdfRootPath = os.path.join(urdfRootPath, "kuka_kr6_support/urdf/kr6r700sixx.urdf")
     self.timeStep = timeStep
     self.useInverseKinematics = useInverseKinematics
     self.useNullSpace = 0
     self.useOrientation = 1
     self.useSimulation = 1
     self.basePosition = basePosition
     self.useFixedBase = useFixedBase
     # 目标位置生成范围
     self.workspace_lim = [[0.3, 0.60], [-0.3, 0.3], [0, 1]]
     self.workspace_lim_endEff = [[0.1, 0.70], [-0.4, 0.4], [0.65, 1]]
     self.endEffLink = 7
     self.action_space = action_space
     self.includeVelObs = includeVelObs
     # 机械臂自由度
     self.numJoints = 6
     self.init_joints = init_joints
     self.reset()
Ejemplo n.º 4
0
 def __init__(self, urdfRoot=robot_data.getDataPath(),
              useIK=0,
              isDiscrete=0,
              actionRepeat=1,
              renders=False,
              maxSteps=1000,
              numControlledJoints=6,
              fixedPositionObj=False,
              includeVelObs=True,
              reward_type = 1):
     super().__init__(urdfRoot, useIK, isDiscrete, actionRepeat, renders, maxSteps,
                      numControlledJoints, fixedPositionObj, includeVelObs)
     self.reward_type = reward_type
     self.reset()
     observation_dim = len(self._observation['observation'])
     self.observation_space = spaces.Dict({
         'observation': spaces.Box(-largeValObservation, largeValObservation, shape=(observation_dim,), dtype=np.float32),
         'achieved_goal': spaces.Box(-largeValObservation, largeValObservation, shape=(3,), dtype=np.float32),
         'desired_goal': spaces.Box(-largeValObservation, largeValObservation, shape=(3,), dtype=np.float32)
     })
     if (self._isDiscrete):
         self.action_space = spaces.Discrete(
             self._kuka.getActionDimension())
     else:
         self._action_bound = 1
         action_high = np.array([self._action_bound] * self.action_dim)
         self.action_space = spaces.Box(-action_high, action_high, dtype='float32')
     self.viewer = None
Ejemplo n.º 5
0
def main(argv):

    numControlledJoints = 6
    fixed = False
    normalize_observations = False
    gamma = 0.9
    batch_size = 16
    memory_limit = 1000000
    normalize_returns = True
    timesteps = 1000000
    policy_name = "reaching_policy"
    discreteAction = 0
    rend = False

    kukaenv = kukaReachGymEnvHer(urdfRoot=robot_data.getDataPath(),
                                 renders=rend,
                                 useIK=0,
                                 isDiscrete=discreteAction,
                                 numControlledJoints=numControlledJoints,
                                 fixedPositionObj=fixed,
                                 includeVelObs=True)
    kukaenv = Monitor(kukaenv, log_dir, allow_early_resets=True)

    n_actions = kukaenv.action_space.shape[-1]
    param_noise = None
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),
                                                sigma=float(0.5) *
                                                np.ones(n_actions))
    model_class = DDPG
    goal_selection_strategy = 'future'
    model = HER(CustomPolicy,
                kukaenv,
                model_class,
                n_sampled_goal=4,
                goal_selection_strategy=goal_selection_strategy,
                verbose=1,
                tensorboard_log=
                "../pybullet_logs/kuka_reach_ddpg/reaching_DDPG_HER_PHASE",
                buffer_size=1000000,
                batch_size=64,
                random_exploration=0.3,
                action_noise=action_noise)

    print(colored("-----Timesteps:", "red"))
    print(colored(timesteps, "red"))
    print(colored("-----Number Joints Controlled:", "red"))
    print(colored(numControlledJoints, "red"))
    print(colored("-----Object Position Fixed:", "red"))
    print(colored(fixed, "red"))
    print(colored("-----Policy Name:", "red"))
    print(colored(policy_name, "red"))
    print(colored("------", "red"))
    print(colored("Launch the script with -h for further info", "red"))

    model.learn(total_timesteps=timesteps, log_interval=100, callback=callback)

    print("Saving model to kuka.pkl")
    model.save("../pybullet_logs/kukareach_ddpg_her/" + policy_name)

    del model  # remove to demonstrate saving and loading
Ejemplo n.º 6
0
def launch(args):
    # create the ddpg_agent
    # env = gym.make(args.env_name)
    rend = False
    discreteAction = 0
    numControlledJoints = 9
    actionRepeat = 1
    fixed = False
    maxStep = 1000
    # env = kukaReachGymEnvHer(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction,
    #                          numControlledJoints=numControlledJoints, fixedPositionObj=fixed, includeVelObs=True)
    env = kukaPickGymEnvHer(urdfRoot=robot_data.getDataPath(),
                            maxSteps=maxStep,
                            renders=rend,
                            useIK=0,
                            isDiscrete=discreteAction,
                            actionRepeat=actionRepeat,
                            numControlledJoints=numControlledJoints,
                            fixedPositionObj=fixed,
                            includeVelObs=True,
                            reward_type=1)

    # get the environment parameters
    env_params = get_env_params(env)
    # create the ddpg agent to interact with the environment
    args.replay_strategy = 'normal'
    ddpg_trainer = ddpg(args, env, env_params)
    ddpg_trainer.learn()
Ejemplo n.º 7
0
    def __init__(self,
                 urdfRootPath=robot_data.getDataPath(),
                 timeStep=0.01,
                 useInverseKinematics=0,
                 basePosition=[-0.6, -0.4, 0.625],
                 useFixedBase=True,
                 action_space=7,
                 includeVelObs=True):

        self.urdfRootPath = os.path.join(
            urdfRootPath, "franka_description/robots/panda_arm_physics.urdf")
        self.timeStep = timeStep
        self.useInverseKinematics = useInverseKinematics
        self.useNullSpace = 0
        self.useOrientation = 1
        self.useSimulation = 1
        self.basePosition = basePosition
        self.useFixedBase = useFixedBase
        self.workspace_lim = [[0.3, 0.60], [-0.3, 0.3], [0, 1]]
        self.workspace_lim_endEff = [[0.1, 0.70], [-0.4, 0.4], [0.65, 1]]
        self.endEffLink = 8
        self.action_space = action_space
        self.includeVelObs = includeVelObs
        self.numJoints = 7
        self.reset()
def main():
    # Open GUI and set pybullet_data in the path
    p.connect(p.GUI)
    #p.setAdditionalSearchPath(pybullet_data.getDataPath())

    # Load plane contained in pybullet_data
    p.loadURDF(os.path.join(pybullet_data.getDataPath(),"plane.urdf"))

    # Set gravity for simulation
    p.setGravity(0,0,-9.8)

    # load robot model
    robotIds = p.loadSDF(os.path.join(robot_data.getDataPath(), "iCub/icub_fixed_model.sdf"))
    icubId = robotIds[0]

    # set constraint between base_link and world
    p.createConstraint(icubId,-1,-1,-1,p.JOINT_FIXED,[0,0,0],[0,0,0],
    						 [p.getBasePositionAndOrientation(icubId)[0][0],
    						  p.getBasePositionAndOrientation(icubId)[0][1],
    						  p.getBasePositionAndOrientation(icubId)[0][2]*1.2],
    						 p.getBasePositionAndOrientation(icubId)[1])

    ##init_pos for standing
    # without FT_sensors
    init_pos = [0]*15 + [-29.4, 28.8, 0, 44.59, 0, 0, 0, 0.47, 0, 0, -29.4, 30.4, 0, 44.59, 0, 0, 0]

    # with FT_sensors
    #init_pos = [0]*19 + [-29.4, 28.8, 0, 0, 44.59, 0, 0, 0, 0.47, 0, 0, -29.4, 30.4, 0, 0, 44.59, 0, 0, 0]

    # all set to zero
    #init_pos = [0]*p.getNumJoints(icubId)

    # Load other objects
    p.loadURDF(os.path.join(pybullet_data.getDataPath(),"table/table.urdf"), [1.1, 0.0, 0.0])
    p.loadURDF(os.path.join(pybullet_data.getDataPath(), "lego/lego.urdf"), [0.5,0.0,0.8])

    # add debug slider
    jointIds=[]
    paramIds=[]
    joints_num = p.getNumJoints(icubId)

    print("len init_pos ",len(init_pos))
    print("len num joints", joints_num)

    for j in range (joints_num):
    	info = p.getJointInfo(icubId,j)
    	jointName = info[1]
    	#jointType = info[2]
    	jointIds.append(j)
    	paramIds.append(p.addUserDebugParameter(jointName.decode("utf-8"), info[8], info[9], init_pos[j]/180*m.pi))


    while True:
    	for i in range(joints_num):
    		p.setJointMotorControl2(icubId, i, p.POSITION_CONTROL,
    								targetPosition=p.readUserDebugParameter(i),
    								targetVelocity=0.0, positionGain=0.25, velocityGain=0.75, force=50)

    	p.stepSimulation()
    	time.sleep(0.01)
    def __init__(self, urdfRoot=robot_data.getDataPath(),
                 useIK=0,
                 isDiscrete=0,
                 actionRepeat=1,
                 renders=False,
                 maxSteps=1000,
                 dist_delta=0.03,
                 numControlledJoints=6,
                 fixedPositionObj=False,
                 includeVelObs=True,
                 reward_type = 1):
        super().__init__(urdfRoot, useIK, isDiscrete, actionRepeat, renders, maxSteps,
                         dist_delta, numControlledJoints, fixedPositionObj, includeVelObs)
        self.reward_type = reward_type
        self.reset()
        observationDim = len(self._observation)
        observation_high = np.array([largeValObservation] * observationDim)
        self.observation_space = spaces.Box(-observation_high,
                                            observation_high, dtype='float32')

        if (self._isDiscrete):
            self.action_space = spaces.Discrete(
                self._kuka.getActionDimension())
        else:
            # self.action_dim = 2 #self._kuka.getActionDimension()
            self._action_bound = 1
            action_high = np.array([self._action_bound] * self.action_dim)
            self.action_space = spaces.Box(-action_high,
                                           action_high, dtype='float32')
        self.viewer = None
    def __init__(self,
                 urdfRoot=robot_data.getDataPath(),
                 actionRepeat=4,
                 useIK=1,
                 isDiscrete=0,
                 control_arm='l',
                 useOrientation=0,
                 rnd_obj_pose=1,
                 renders=False,
                 maxSteps = 2000,
                 reward_type = 1):

        self._control_arm=control_arm
        self._isDiscrete = isDiscrete
        self._timeStep = 1./240.
        self._useIK = 1 if self._isDiscrete else useIK
        self._useOrientation = useOrientation
        self._urdfRoot = urdfRoot
        self._actionRepeat = actionRepeat
        self._observation = []
        self._envStepCounter = 0
        self._renders = renders
        self._maxSteps = maxSteps
        self.terminated = 0
        self._cam_dist = 1.3
        self._cam_yaw = 180
        self._cam_pitch = -40
        self._h_table = []
        self._target_dist_min = 0.03
        self._rnd_obj_pose = rnd_obj_pose
        self._reward_type = reward_type

        # Initialize PyBullet simulator
        self._p = p
        if self._renders:
          self._cid = p.connect(p.SHARED_MEMORY)
          if (self._cid<0):
             self._cid = p.connect(p.GUI)
          p.resetDebugVisualizerCamera(2.5,90,-60,[0.0,-0.0,-0.0])
        else:
            self._cid = p.connect(p.DIRECT)

        # initialize simulation environment
        self.seed()
        self.reset()

        observationDim = len(self._observation)
        observation_high = np.array([largeValObservation] * observationDim)
        self.observation_space = spaces.Box(-observation_high, observation_high, dtype='float32')

        if (self._isDiscrete):
            self.action_space = spaces.Discrete(13) if self._icub.useOrientation else spaces.Discrete(7)

        else:
            action_dim = self._icub.getActionDimension()
            self._action_bound = 1
            action_high = np.array([self._action_bound] * action_dim)
            self.action_space = spaces.Box(-action_high, action_high, dtype='float32')

        self.viewer = None
Ejemplo n.º 11
0
def main():
    model_class = DDPG  # works also with SAC and DDPG

    # -j
    action_space = 7
    # -p
    fixed = True
    # -o
    normalize_observations = False
    # -g
    gamma = 0.9
    # -b
    #batch_size = 16
    # -m
    memory_limit = 1000000
    # -r
    normalize_returns = True
    # -t
    timesteps = 1000000
    policy_name = "pushing_policy"
    discreteAction = 0
    rend = False
    env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(),
                                 renders=rend,
                                 useIK=0,
                                 isDiscrete=discreteAction,
                                 action_space=action_space,
                                 fixedPositionObj=fixed,
                                 includeVelObs=True)

    # Available strategies (cf paper): future, final, episode, random
    goal_selection_strategy = 'future'  # equivalent to GoalSelectionStrategy.FUTURE
    n_actions = env.action_space.shape[-1]
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),
                                                sigma=float(0.5) *
                                                np.ones(n_actions))
    # Wrap the model

    model = HER(
        CustomPolicy,
        env,
        model_class,
        n_sampled_goal=4,
        goal_selection_strategy=goal_selection_strategy,
        verbose=1,
        tensorboard_log=
        "../pybullet_logs/panda_push_ddpg/stable_baselines/DDPG+HER_FIXED_DYN_RAND",
        buffer_size=1000000,
        batch_size=256,
        random_exploration=0.3,
        action_noise=action_noise)

    # Train the model starting from a previous policy
    model.learn(timesteps)
    print("Saving Policy")
    model.save("../policies/pushing_fixed_HER_Dyn_Rand")
Ejemplo n.º 12
0
def main(argv):
    # -p
    fixed = False
    # -j
    numControlledJoints = 7
    # -n
    policy_name = "pushing_policy"

    # COMMAND LINE PARAMS MANAGEMENT:
    try:
        opts, args = getopt.getopt(argv, "hj:p:n:", ["j=", "p=", "n="])
    except getopt.GetoptError:
        print('test.py -j <numJoints> -p <fixedPoseObject> -p <policy_name> ')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('------------------ Default values:')
            print(
                'test.py  -j <numJoints: 7> -p <fixedPoseObject: False> -n <policy_name:"pushing_policy"> '
            )
            print('------------------')
            return 0
            sys.exit()
        elif opt in ("-j", "--j"):
            if (numControlledJoints > 7):
                print("Check dimension state")
                return 0
            else:
                numControlledJoints = int(arg)
        elif opt in ("-p", "--p"):
            fixed = bool(arg)
        elif opt in ("-n", "--n"):
            policy_name = str(arg)

    print(colored("-----Number Joints Controlled:", "red"))
    print(colored(numControlledJoints, "red"))
    print(colored("-----Object Position Fixed:", "red"))
    print(colored(fixed, "red"))
    print(colored("-----Policy Name:", "red"))
    print(colored(policy_name, "red"))
    print(colored("------", "red"))
    print(colored("Launch the script with -h for further info", "red"))

    model = DDPG.load(policy_name)

    pandaenv = pandaPushGymEnv(urdfRoot=robot_data.getDataPath(),
                               renders=True,
                               useIK=0,
                               numControlledJoints=numControlledJoints,
                               fixedPositionObj=fixed,
                               includeVelObs=True)
    obs = pandaenv.reset()
    while True:
        action, _states = model.predict(obs)
        obs, rewards, dones, info = pandaenv.step(action)
Ejemplo n.º 13
0
def main():

    p.connect(p.GUI)
    p.loadURDF(os.path.join(pybullet_data.getDataPath(), "plane.urdf"))

    # Set gravity for simulation
    p.setGravity(0, 0, -9.8)

    # load robot model
    cube_start_pos = [0, 0, 1]
    robot_id = p.loadURDF(os.path.join(robot_data.getDataPath(),
                                       "darwinop/darwinOP.urdf"),
                          cube_start_pos,
                          globalScaling=2)
    grav_id = p.addUserDebugParameter("gravity", -10, 10, -10)

    joint_ids = []
    param_ids = []

    for joint in range(p.getNumJoints(robot_id)):
        joint_info = p.getJointInfo(robot_id, joint)
        print('{num} | {name}'.format(num=joint_info[0],
                                      name=joint_info[1].decode('utf-8')))

        joint_name = joint_info[1]
        joint_type = joint_info[2]
        joint_lower_limit = joint_info[8]
        joint_upper_limit = joint_info[9]
        if joint_type == p.JOINT_PRISMATIC or joint_type == p.JOINT_REVOLUTE:
            joint_ids.append(joint)
            param_ids.append(
                p.addUserDebugParameter(joint_name.decode("utf-8"),
                                        joint_lower_limit, joint_upper_limit,
                                        0))

    p.setRealTimeSimulation(1)
    while True:
        time.sleep(0.01)
        p.setGravity(0, 0, p.readUserDebugParameter(grav_id))
        for i in range(len(param_ids)):
            config = param_ids[i]
            target_pos = p.readUserDebugParameter(config)
            p.setJointMotorControl2(robot_id,
                                    joint_ids[i],
                                    p.POSITION_CONTROL,
                                    target_pos,
                                    force=5 * 240.)
Ejemplo n.º 14
0
    def __init__(self,
                 urdfRoot=robot_data.getDataPath(),
                 useIK=0,
                 isDiscrete=0,
                 actionRepeat=1,
                 renders=False,
                 maxSteps=1000,
                 numControlledJoints=6,
                 fixedPositionObj=False,
                 includeVelObs=True,
                 init_joints=[0, -0.5, 1.5, 0, 0, 0]):

        self.action_dim = numControlledJoints
        self._isDiscrete = isDiscrete
        self._timeStep = 1. / 240.  #刷新率 240HZ
        self._useIK = useIK
        self._urdfRoot = urdfRoot
        self._actionRepeat = actionRepeat
        self._observation = []
        self._envStepCounter = 0
        self._renders = renders
        self._maxSteps = maxSteps
        self.terminated = False
        self._cam_dist = 1.3
        self._cam_yaw = 180
        self._cam_pitch = -40
        self._h_table = []
        self._target_dist_min = 0.1  # 与任务成功的判定有关
        self._p = p
        self.fixedPositionObj = fixedPositionObj
        self.includeVelObs = includeVelObs
        self.ws_lim = [[0.3, 0.5], [-0.2, 0.2], [0, 1]]
        self.init_joints = init_joints

        if self._renders:
            cid = p.connect(p.SHARED_MEMORY)
            if (cid < 0):
                cid = p.connect(p.GUI)
            p.resetDebugVisualizerCamera(2.5, 90, -60, [0.52, -0.2, -0.33])
        else:
            p.connect(p.DIRECT)
def main(load_policy=False):

    global log_dir, log_dir_policy
    if (load_policy):
          log_dir_policy = '../policies/PUSHING_TD3+HER_FIXED_POSITION_DYN_RAND_FROM_FIXED_PHYSICS'
    model_class = TD3  # works also with SAC and DDPG
    action_space = 7
    fixed = True
    normalize_observations = False
    gamma = 0.9
    memory_limit = 1000000
    normalize_returns = True
    timesteps = 1500000
    discreteAction = 0
    rend = False
    env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0,
            isDiscrete=discreteAction, action_space = action_space,
            fixedPositionObj = fixed, includeVelObs = True)


    env = Monitor(env, log_dir, allow_early_resets=True)
    # Available strategies (cf paper): future, final, episode, random
    goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE
    n_actions = env.action_space.shape[-1]
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))
    # Wrap the model

    model = HER(CustomPolicy, env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy,
                verbose=1,tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3+HER_FIXED_DYN_RAND", buffer_size=1000000,batch_size=256,
                random_exploration=0.3, action_noise=action_noise)

    if (load_policy):
        model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env, n_sampled_goal=4,
        goal_selection_strategy=goal_selection_strategy,
        tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3+HER_FIXED_DYN_RAND_FROM_FIXED_PHYSICS",
        buffer_size=1000000,batch_size=256,random_exploration=0.3, action_noise=action_noise)

    # Train the model starting from a previous policy
    model.learn(timesteps, callback = callback )
    model.save("../policies/PUSHING_FIXED_TD3_DYN_RAND")
    print("Finished train1")
def main(load_policy=False):
    global log_dir
    model_class = TD3  # works also with SAC and DDPG
    action_space = 6
    fixed = True
    #0 completely fixed, 1 slightly random radius, 2 big random radius,
    object_position = 1
    normalize_observations = False
    gamma = 0.9
    memory_limit = 1000000
    normalize_returns = True
    timesteps = 5000000
    discreteAction = 0
    rend = False

    env = pandaPushGymEnvHER(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=1,
            isDiscrete=discreteAction, action_space = action_space,
            fixedPositionObj = fixed, includeVelObs = True, object_position=object_position)

    env = Monitor(env, log_dir, allow_early_resets=True)

    goal_selection_strategy = 'future'
    n_actions = env.action_space.shape[-1]
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))
    # Wrap the model

    model = HER(CustomTD3Policy, env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy,
                verbose=1,tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/PUSHING_TD3+HER_FIXED_POSITION_PHASE_1_IK", buffer_size=1000000,batch_size=256,
                random_exploration=0.3, action_noise=action_noise)

    if (load_policy):
        model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env, n_sampled_goal=4,
        goal_selection_strategy=goal_selection_strategy,
        tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/PUSHING_TD3+HER_FIXED_POSITION_PHASE_1_IK",
        buffer_size=1000000,batch_size=256,random_exploration=0.3, action_noise=action_noise)

    model.learn(timesteps,log_interval=100, callback = callback)
    print("Saving Policy PHASE_1")
    model.save("../policies/PUSHING_TD3+HER_FIXED_POSITION_PHASE_1_IK")
Ejemplo n.º 17
0
# Load plane contained in pybullet_data
planeId = p.loadURDF("plane.urdf")

# Set gravity for simulation
p.setGravity(0, 0, -9.8)

dir_path = os.path.dirname(os.path.realpath(__file__))
for root, dirs, files in os.walk(os.path.dirname(dir_path)):
    for file in files:
        if file.endswith('.urdf'):
            print(root)
            p.setAdditionalSearchPath(root)

kukaId = p.loadURDF(
    os.path.join(robot_data.getDataPath(),
                 "kuka_kr6_support/urdf/kr6r700sixx.urdf"))
# kukaId = p.loadURDF(os.path.join(robot_data.getDataPath(),"kuka_kr6_support/urdf/kuka_model.urdf"))

# set constraint between base_link and world
cid = p.createConstraint(
    kukaId, -1, -1, -1, p.JOINT_FIXED, [0, 0, 0], [0, 0, 0], [
        p.getBasePositionAndOrientation(kukaId)[0][0],
        p.getBasePositionAndOrientation(kukaId)[0][1],
        p.getBasePositionAndOrientation(kukaId)[0][2] * 1.2
    ],
    p.getBasePositionAndOrientation(kukaId)[1])
numJoints = p.getNumJoints(kukaId)
print(numJoints)

for i in range(numJoints):
Ejemplo n.º 18
0
 args = get_args()
 # load the model param
 model_path = args.save_dir + args.env_name + '/model.pt'
 print('model path:', model_path)
 o_mean, o_std, g_mean, g_std, model = torch.load(
     model_path, map_location=lambda storage, loc: storage)
 # create the environment
 # env = gym.make(args.env_name)
 rend = True
 discreteAction = 0
 numControlledJoints = 6
 fixed = False
 actionRepeat = 1
 reward_type = 1
 if args.env_name.startswith('reach'):
     env = kukaReachGymEnvHer(urdfRoot=robot_data.getDataPath(),
                              maxSteps=int(1e10),
                              actionRepeat=actionRepeat,
                              renders=rend,
                              useIK=0,
                              isDiscrete=discreteAction,
                              numControlledJoints=numControlledJoints,
                              fixedPositionObj=fixed,
                              includeVelObs=True,
                              reward_type=reward_type)
 elif args.env_name.startswith('push'):
     env = kukaPushGymEnvHer(urdfRoot=robot_data.getDataPath(),
                             actionRepeat=actionRepeat,
                             renders=rend,
                             useIK=0,
                             isDiscrete=discreteAction,
# Open GUI and set pybullet_data in the path
p.connect(p.GUI)
p.setAdditionalSearchPath(pybullet_data.getDataPath())

# Load plane contained in pybullet_data
planeId = p.loadURDF("plane.urdf")

# Set gravity for simulation
p.setGravity(0,0,-9.8)

dir_path = os.path.dirname(os.path.realpath(__file__))
for root, dirs, files in os.walk(os.path.dirname(dir_path)):
    for file in files:
        if file.endswith('.urdf'):
            print (root)
            p.setAdditionalSearchPath(root)

pandaId = p.loadURDF(os.path.join(robot_data.getDataPath(),"franka_description/robots/panda_arm_physics.urdf"))

# set constraint between base_link and world
cid = p.createConstraint(pandaId,-1,-1,-1,p.JOINT_FIXED,[0,0,0],[0,0,0],
						 [p.getBasePositionAndOrientation(pandaId)[0][0],
						  p.getBasePositionAndOrientation(pandaId)[0][1],
						  p.getBasePositionAndOrientation(pandaId)[0][2]*1.2],
						 p.getBasePositionAndOrientation(pandaId)[1])


while True:
	p.stepSimulation()
	time.sleep(0.01)
    def __init__(self,
                 urdfRoot=robot_data.getDataPath(),
                 useIK=0,
                 isDiscrete=0,
                 actionRepeat=1,
                 renders=False,
                 maxSteps=1000,
                 dist_delta=0.03,
                 numControlledJoints=7,
                 fixedPositionObj=True,
                 includeVelObs=True):

        self.action_dim = numControlledJoints
        self._isDiscrete = isDiscrete
        self._timeStep = 1. / 240.
        self._useIK = useIK
        self._urdfRoot = urdfRoot
        self._actionRepeat = actionRepeat
        self._observation = []
        self._envStepCounter = 0
        self._renders = renders
        self._maxSteps = maxSteps
        self.terminated = False
        self._cam_dist = 1.3
        self._cam_yaw = 180
        self._cam_pitch = -40
        self._h_table = []
        self._target_dist_max = 0.3
        self._target_dist_min = 0.2
        self._p = p
        self.fixedPositionObj = fixedPositionObj
        self.includeVelObs = includeVelObs

        if self._renders:
            cid = p.connect(p.SHARED_MEMORY)
            if (cid < 0):
                cid = p.connect(p.GUI)
            p.resetDebugVisualizerCamera(2.5, 90, -60, [0.52, -0.2, -0.33])
        else:
            p.connect(p.DIRECT)

        #self.seed()
        # initialize simulation environment
        self.reset()

        observationDim = len(self._observation)
        observation_high = np.array([largeValObservation] * observationDim)
        self.observation_space = spaces.Box(-observation_high,
                                            observation_high,
                                            dtype='float32')

        if (self._isDiscrete):
            self.action_space = spaces.Discrete(
                self._panda.getActionDimension())

        else:
            #self.action_dim = 2 #self._panda.getActionDimension()
            self._action_bound = 1
            action_high = np.array([self._action_bound] * self.action_dim)
            self.action_space = spaces.Box(-action_high,
                                           action_high,
                                           dtype='float32')

        self.viewer = None
Ejemplo n.º 21
0
import os, inspect

currentdir = os.path.dirname(
    os.path.abspath(inspect.getfile(inspect.currentframe())))
print(currentdir)
parentdir = os.path.dirname(os.path.dirname(currentdir))
os.sys.path.insert(0, parentdir)

import pybullet as p
import pybullet_data
import robot_data
import time
import math as m

print('robot_data.getDataPath()', robot_data.getDataPath())


def main():

    p.connect(p.GUI)
    p.loadURDF(os.path.join(pybullet_data.getDataPath(), "plane.urdf"))

    # Set gravity for simulation
    p.setGravity(0, 0, -9.8)

    # load robot model
    cube_start_pos = [0, 0, 1]
    robot_id = p.loadURDF(os.path.join(robot_data.getDataPath(),
                                       "darwinop/darwinOP.urdf"),
                          cube_start_pos,
                          globalScaling=2)
    def __init__(self,
                 urdfRoot=robot_data.getDataPath(),
                 useIK=0,
                 isDiscrete=0,
                 actionRepeat=1,
                 renders=False,
                 maxSteps=1000,
                 dist_delta=0.03,
                 action_space=7,
                 fixedPositionObj=True,
                 includeVelObs=True,
                 object_position=0,
                 test_phase=False,
                 alg='ddpg',
                 max_episode_steps=1000,
                 type_physics=0):

        self.object_position = object_position
        self.action_dim = action_space
        self._isDiscrete = isDiscrete
        self._param_lambda = 1 / np.random.uniform(125, 1000)
        self._timeStep = (1.0 / 240.0) + np.random.exponential(
            self._param_lambda)
        self._useIK = useIK
        self._urdfRoot = urdfRoot
        self._actionRepeat = actionRepeat
        self._observation = []
        self._envStepCounter = 0
        self._renders = renders
        self._maxSteps = maxSteps
        self._cam_dist = 1.3
        self._cam_yaw = 180
        self._cam_pitch = -40
        self._h_table = []
        self._target_dist_max = 0.3
        self._target_dist_min = 0.1
        self._p = p
        self.fixedPositionObj = fixedPositionObj
        self.includeVelObs = includeVelObs
        self.test_phase = test_phase
        self.alg = alg
        self.max_episode_steps = max_episode_steps
        self.type_physics = type_physics

        if self._renders:
            cid = p.connect(p.SHARED_MEMORY)
            if (cid < 0):
                cid = p.connect(p.GUI)
            p.resetDebugVisualizerCamera(2.5, 90, -60, [0.52, -0.2, -0.33])
        else:
            p.connect(p.DIRECT)

        # self.seed()
        # initialize simulation environment
        self._observation = self.reset()

        observation_dim = len(self._observation['observation'])

        self.observation_space = spaces.Dict({
            'observation':
            spaces.Box(-largeValObservation,
                       largeValObservation,
                       shape=(observation_dim, ),
                       dtype=np.float32),

            #the archieved goal is the position reached with the object in space
            'achieved_goal':
            spaces.Box(-largeValObservation,
                       largeValObservation,
                       shape=(3, ),
                       dtype=np.float32),

            #the desired goal is the desired position in space
            'desired_goal':
            spaces.Box(-largeValObservation,
                       largeValObservation,
                       shape=(3, ),
                       dtype=np.float32)
        })

        print(self.observation_space)

        if (self._isDiscrete):
            self.action_space = spaces.Discrete(
                self._panda.getActionDimension())

        else:
            #self.action_dim = 2 #self._panda.getActionDimension()
            self._action_bound = 1
            action_high = np.array([self._action_bound] * self.action_dim)
            self.action_space = spaces.Box(-action_high,
                                           action_high,
                                           dtype=np.float32)

        self.viewer = None
Ejemplo n.º 23
0
def main(argv):

    # -j
    numControlledJoints = 7
    # -p
    fixed = False
    # -o
    normalize_observations = False
    # -g
    gamma = 0.9
    # -b
    batch_size = 128
    # -m
    memory_limit = 1000000
    # -r
    normalize_returns = True
    # -t
    timesteps = 10000000

    policy_name = "pushing_policy"

    # COMMAND LINE PARAMS MANAGEMENT:
    try:
        opts, args = getopt.getopt(argv,"hj:p:g:b:m:r:o:t:n:",["j=","p=","g=","b=","m=","r=","o=","t=","n="])
    except getopt.GetoptError:
        print ('train.py -t <timesteps> -j <numJoints> -p <fixedPoseObject> -n <policy_name> -g <gamma> -b <batchsize> -m <memory_limit> -r <norm_ret> -o <norm_obs> -p <policy_name>')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('------------------ Default values:')
            print('train.py -t <timesteps: 10000000> -j <numJoints: 7> -p <fixedPoseObject: False> -n <policy_name:"pushing_policy"> -g <gamma: 0.9> -b <batch_size: 16> -m <memory_limit: 1000000> -r <norm_ret: True> -o <norm_obs: False> ')
            print('------------------')
            return 0
            sys.exit()
        elif opt in ("-j", "--j"):
            if(numControlledJoints >7):
                print("check dim state")
                return 0
            else:
                numControlledJoints = int(arg)
        elif opt in ("-p", "--p"):
            fixed = bool(arg)
        elif opt in ("-g", "--g"):
            gamma = float(arg)
        elif opt in ("-o", "--o"):
            normalize_observations = bool(arg)
        elif opt in ("-b", "--b"):
            batch_size = int(arg)
        elif opt in ("-m", "--m"):
            memory_limit = int(arg)
        elif opt in ("-r", "--r"):
            normalize_returns = bool(arg)
        elif opt in ("-t", "--t"):
            timesteps = int(arg)
        elif opt in ("-n","--n"):
            policy_name = str(arg)


    discreteAction = 0
    rend = False
    pandaenv = pandaPushGymEnv(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0,
        isDiscrete=discreteAction, numControlledJoints = numControlledJoints,
        fixedPositionObj = fixed, includeVelObs = True)

    n_actions = pandaenv.action_space.shape[-1]
    param_noise = None
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))


    pandaenv = DummyVecEnv([lambda: pandaenv])

    model = DDPG(LnMlpPolicy, pandaenv,normalize_observations = normalize_observations, gamma=gamma,batch_size=batch_size,
                memory_limit=memory_limit, normalize_returns = normalize_returns, verbose=1, param_noise=param_noise,
                action_noise=action_noise, tensorboard_log="../pybullet_logs/pandareach_ddpg/", reward_scale = 1)

    print(colored("-----Timesteps:","red"))
    print(colored(timesteps,"red"))
    print(colored("-----Number Joints Controlled:","red"))
    print(colored(numControlledJoints,"red"))
    print(colored("-----Object Position Fixed:","red"))
    print(colored(fixed,"red"))
    print(colored("-----Policy Name:","red"))
    print(colored(policy_name,"red"))
    print(colored("------","red"))
    print(colored("Launch the script with -h for further info","red"))

    model.learn(total_timesteps=timesteps)
    print("Saving model to panda.pkl")
    model.save("../pybullet_logs/pandareach_ddpg/policies"+ policy_name)

    del model # remove to demonstrate saving and loading
Ejemplo n.º 24
0
normalize_observations = False
# -g
gamma = 0.9
# -b
batch_size = 16
# -m
memory_limit = 1000000
# -r
normalize_returns = True
# -t
timesteps = 10000000
policy_name = "pushing_policy"
discreteAction = 0
rend = True

env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(),
                             renders=rend,
                             useIK=0,
                             isDiscrete=discreteAction,
                             numControlledJoints=numControlledJoints,
                             fixedPositionObj=fixed,
                             includeVelObs=True)

# Wrap the model
model = HER.load("../policies/pushing_fixed_HER_Dyn_Rand0.pkl", env=env)

obs = env.reset()

for i in range(10000):
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)
normalize_observations = False
# -g
gamma = 0.9
# -b
batch_size = 16
# -m
memory_limit = 1000000
# -r
normalize_returns = True
# -t
timesteps = 1000000
policy_name = "pushing_policy"
discreteAction = 0
rend = True

env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0,
        isDiscrete=discreteAction, action_space = action_space,
        fixedPositionObj = fixed, includeVelObs = True, object_position=0, test_phase = True, alg = 'td3_normal_policy_to_different_physics', type_physics=2, max_episode_steps=500)

goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE
# Wrap the model
model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env)

obs = env.reset()

for _ in range(10000):
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    if done:
        obs = env.reset()
Ejemplo n.º 26
0
def main(argv):

    numControlledJoints = 6
    fixed = False
    normalize_observations = False
    gamma = 0.9
    batch_size = 16
    memory_limit = 1000000
    normalize_returns = True
    timesteps = 1000000
    policy_name = "reaching_policy"
    # COMMAND LINE PARAMS MANAGEMENT:

    discreteAction = 0
    # rend = True
    rend = False

    kukaenv = kukaReachGymEnv(urdfRoot=robot_data.getDataPath(),
                              renders=rend,
                              useIK=0,
                              isDiscrete=discreteAction,
                              numControlledJoints=numControlledJoints,
                              fixedPositionObj=fixed,
                              includeVelObs=True)
    n_actions = kukaenv.action_space.shape[-1]
    param_noise = None
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),
                                                sigma=float(0.5) *
                                                np.ones(n_actions))
    kukaenv = DummyVecEnv([lambda: kukaenv])
    full_log = True
    model = DDPG(
        LnMlpPolicy,
        kukaenv,
        normalize_observations=normalize_observations,
        gamma=gamma,
        batch_size=batch_size,
        memory_limit=memory_limit,
        normalize_returns=normalize_returns,
        verbose=1,
        param_noise=param_noise,
        action_noise=action_noise,
        tensorboard_log=
        "../pybullet_logs/kuka_reach_ddpg/reaching_obstacle_DDPG_PHASE_1",
        full_tensorboard_log=full_log,
        reward_scale=1)

    print(colored("-----Timesteps:", "red"))
    print(colored(timesteps, "red"))
    print(colored("-----Number Joints Controlled:", "red"))
    print(colored(numControlledJoints, "red"))
    print(colored("-----Object Position Fixed:", "red"))
    print(colored(fixed, "red"))
    print(colored("-----Policy Name:", "red"))
    print(colored(policy_name, "red"))
    print(colored("------", "red"))
    print(colored("Launch the script with -h for further info", "red"))

    model.learn(total_timesteps=timesteps)

    print("Saving model to kuka.pkl")
    model.save("../pybullet_logs/kukareach_obstacle_ddpg/" + policy_name)

    del model  # remove to demonstrate saving and loading