def main():

    # create Environment
    env = iCubPushGymEnv(urdfRoot=robot_data.getDataPath(), renders=False, useIK=1,
                        isDiscrete=0, rnd_obj_pose=0, maxSteps=2000, reward_type=0)

    # set seed
    seed = 1
    tf.reset_default_graph()
    set_global_seed(seed)
    env.seed(seed)

    # set log
    monitor_dir = os.path.join(log_dir,'log')
    os.makedirs(monitor_dir, exist_ok=True)
    env = Monitor(env, monitor_dir+'/', allow_early_resets=True)

    # create agent model
    nb_actions = env.action_space.shape[-1]
    action_noise = NormalActionNoise(mean=np.zeros(nb_actions), sigma=float(0.5373) * np.ones(nb_actions))

    model = DDPG('LnMlpPolicy', env, action_noise=action_noise, gamma=0.99, batch_size=16,
                normalize_observations=True,normalize_returns=False, memory_limit=100000,
                verbose=1, tensorboard_log=os.path.join(log_dir,'tb'),full_tensorboard_log=False)

    #start learning
    model.learn(total_timesteps=500000, seed=seed, callback=callback)

    # save model
    print("Saving model.pkl to ",log_dir)
    act.save(log_dir+"/final_model.pkl")
Ejemplo n.º 2
0
def main():
    use_IK = 0
    discreteAction = 0
    use_IK = 1 if discreteAction else use_IK

    env = kukaPushGymEnv(urdfRoot=robot_data.getDataPath(), renders=True, useIK=use_IK, isDiscrete=discreteAction)
    motorsIds = []

    dv = 1
    for i in range(6):
        info = env._p.getJointInfo(env._kuka.kukaId,i)
        jointName = info[1]
        motorsIds.append(env._p.addUserDebugParameter(jointName.decode("utf-8"), -dv, dv, 0.0))

    done = False
    env._p.addUserDebugText('current hand position',[0,-0.5,1.4],[1.1,0,0])
    idx = env._p.addUserDebugText(' ',[0,-0.5,1.2],[1,0,0])

    for t in range(int(1e7)):
        #env.render()
        action = []
        for motorId in range(6):
            action.append(env._p.readUserDebugParameter(motorId))

        action = int(action[0]) if discreteAction else action

        # action = env.action_space.sample()

        #print(env.step(action))

        state, reward, done, _ = env.step(action)
        if t%100==0:
            print("reward ", reward)
            print("done ", done)
            env._p.addUserDebugText(' '.join(str(round(e,2)) for e in state[:6]),[0,-0.5,1.2],[1,0,0],replaceItemUniqueId=idx)
Ejemplo n.º 3
0
    def __init__(self,
                 urdfRootPath=robot_data.getDataPath(),
                 timeStep=0.01,
                 useInverseKinematics=0,
                 basePosition=[-0.6, -0.4, 0.625],
                 useFixedBase=True,
                 action_space=6,
                 includeVelObs=True):
        self.urdfRootPath = os.path.join(
            urdfRootPath, "kuka_kr6_support/urdf/kr6r700sixx.urdf")
        # self.urdfRootPath = os.path.join(urdfRootPath, "kuka_kr6_support/urdf/kuka_model.urdf")

        self.timeStep = timeStep
        self.useInverseKinematics = useInverseKinematics
        self.useNullSpace = 0
        self.useOrientation = 1
        self.useSimulation = 1
        self.basePosition = basePosition
        self.useFixedBase = useFixedBase
        self.workspace_lim = [[0.3, 0.60], [-0.3, 0.3], [0, 1]]
        self.workspace_lim_endEff = [[0.1, 0.70], [-0.4, 0.4], [0.65, 1]]
        self.endEffLink = 7
        self.action_space = action_space
        self.includeVelObs = includeVelObs
        self.numJoints = 6
        self.reset()
Ejemplo n.º 4
0
def main(argv):
    numControlledJoints = 6
    fixed = False
    normalize_observations = False
    gamma = 0.99
    batch_size = 64
    memory_limit = 1000000
    normalize_returns = True
    timesteps = 1000000
    policy_name = "pushing_policy"
    discreteAction = 0
    rend = False

    kukaenv = kukaPushGymEnvHer(urdfRoot=robot_data.getDataPath(),
                                renders=rend,
                                useIK=0,
                                isDiscrete=discreteAction,
                                numControlledJoints=numControlledJoints,
                                fixedPositionObj=fixed,
                                includeVelObs=True)
    kukaenv = Monitor(kukaenv, log_dir, allow_early_resets=True)

    n_actions = kukaenv.action_space.shape[-1]
    param_noise = None
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),
                                                sigma=float(0.5) *
                                                np.ones(n_actions))
    model_class = DDPG
    goal_selection_strategy = 'future'
    model = HER(CustomPolicy,
                kukaenv,
                model_class,
                n_sampled_goal=4,
                goal_selection_strategy=goal_selection_strategy,
                verbose=1,
                tensorboard_log=
                "../pybullet_logs/kuka_push_ddpg/pushing_DDPG_HER_PHASE_1",
                buffer_size=1000000,
                batch_size=64,
                random_exploration=0.3,
                action_noise=action_noise)

    print(colored("-----Timesteps:", "red"))
    print(colored(timesteps, "red"))
    print(colored("-----Number Joints Controlled:", "red"))
    print(colored(numControlledJoints, "red"))
    print(colored("-----Object Position Fixed:", "red"))
    print(colored(fixed, "red"))
    print(colored("-----Policy Name:", "red"))
    print(colored(policy_name, "red"))
    print(colored("------", "red"))
    print(colored("Launch the script with -h for further info", "red"))

    model.learn(total_timesteps=timesteps, log_interval=100, callback=callback)

    print("Saving model to kuka.pkl")
    model.save("../pybullet_logs/kukapush_ddpg_her/" + policy_name)

    del model  # remove to demonstrate saving and loading
Ejemplo n.º 5
0
 def __init__(self, urdf_root_path=robot_data.getDataPath(),
              time_step=0.01,
              global_scale=2):
     self.urdf_root_path = os.path.join(urdf_root_path, 'darwinop/darwinOP.urdf')
     self.time_step = time_step
     self.global_scale = global_scale
     self.darwin_id = None
     self.start_position = [0, 0, 1]
     self.num_joints = 20
     self.reset()
Ejemplo n.º 6
0
def main():

    # create Environment
    env = iCubPushGymEnv(urdfRoot=robot_data.getDataPath(), renders=True, useIK=1,
                        isDiscrete=0, rnd_obj_pose=0, maxSteps=2000, reward_type=0)


    model = DDPG.load(os.path.join(log_dir,'final_model.pkl'), env=env)

    #model.learn(total_timesteps=10000)
    # Evaluate the trained agent
    mean_reward = evaluate(env, model, num_steps=6000)
Ejemplo n.º 7
0
def main(args):

    use_IK = 1 if args.useIK else 0

    env = iCubReachGymEnv(urdfRoot=robot_data.getDataPath(),
                          renders=True,
                          control_arm=args.arm,
                          useIK=use_IK,
                          isDiscrete=0,
                          useOrientation=1,
                          rnd_obj_pose=1)
    motorsIds = []

    if (env._isDiscrete):
        dv = 12
        motorsIds.append(env._p.addUserDebugParameter("lhPosX", -dv, dv, 0))
    elif use_IK:
        dv = 1
        motorsIds.append(env._p.addUserDebugParameter("lhPosX", -dv, dv, 0.0))
        motorsIds.append(env._p.addUserDebugParameter("lhPosY", -dv, dv, 0.0))
        motorsIds.append(env._p.addUserDebugParameter("lhPosZ", -dv, dv, 0.0))
        motorsIds.append(env._p.addUserDebugParameter("lhRollx", -dv, dv, 0.0))
        motorsIds.append(env._p.addUserDebugParameter("lhPitchy", -dv, dv,
                                                      0.0))
        motorsIds.append(env._p.addUserDebugParameter("lhYawz", -dv, dv, 0.0))

    else:
        dv = 1
        joints_idx = env._icub.motorIndices

        for j in joints_idx:
            info = env._p.getJointInfo(env._icub.icubId, j)
            jointName = info[1]
            motorsIds.append(
                env._p.addUserDebugParameter(jointName.decode("utf-8"), -dv,
                                             dv, 0.0))

    done = False
    #env._p.addUserDebugText('current hand position',[0,-0.5,1.4],[1.1,0,0])
    #idx = env._p.addUserDebugText(' ',[0,-0.5,1.2],[1,0,0])

    for t in range(10000000):
        #env.render()
        action = []
        for motorId in motorsIds:
            action.append(env._p.readUserDebugParameter(motorId))

        action = int(action[0]) if env._isDiscrete else action

        state, reward, done, _ = env.step(action)
        if t % 100 == 0:
            print("reward ", reward)
def main(args):
    use_IK = 0
    discreteAction = 0
    use_IK = 1 if discreteAction else use_IK

    env = kukaReachGymEnvOb(urdfRoot=robot_data.getDataPath(),
                            maxSteps=100000,
                            actionRepeat=20,
                            renders=True,
                            useIK=use_IK,
                            isDiscrete=discreteAction,
                            obstacles_num=args.obs_num)
    motorsIds = []

    dv = 1
    numJoint = env._p.getNumJoints(env._kuka.kukaId)
    for i in range(numJoint):
        info = env._p.getJointInfo(env._kuka.kukaId, i)
        jointName = info[1]
        print(info)
        motorsIds.append(
            env._p.addUserDebugParameter(jointName.decode("utf-8"), -dv, dv,
                                         0.0))

    done = False
    env._p.addUserDebugText('current hand position', [0, -0.5, 1.4],
                            [1.1, 0, 0])
    idx = env._p.addUserDebugText(' ', [0, -0.5, 1.2], [1, 0, 0])

    for t in range(int(1e7)):
        # env.render()
        action = []
        for motorId in range(6):
            action.append(env._p.readUserDebugParameter(motorId))

        # action = int(action[0]) if discreteAction else action
        # print(action)
        # print(env.step(action))
        action = env.action_space.sample()
        # print(action)
        # action[-3] = action[-2]

        state, reward, done, _ = env.step(action)
        time.sleep(0.1)
        if t % 100 == 0:
            print("reward ", reward)
            print("done ", done)
            env._p.addUserDebugText(' '.join(
                str(round(e, 2)) for e in state['observation'][:6]),
                                    [0, -0.5, 1.2], [1, 0, 0],
                                    replaceItemUniqueId=idx)
Ejemplo n.º 9
0
    def __init__(self,
                 urdfRoot=robot_data.getDataPath(),
                 useIK=0,
                 isDiscrete=0,
                 actionRepeat=1,
                 renders=False,
                 maxSteps=1000,
                 dist_delta=0.03,
                 numControlledJoints=6,
                 fixedPositionObj=False,
                 includeVelObs=True):

        self.action_dim = numControlledJoints
        self._isDiscrete = isDiscrete
        self._timeStep = 1. / 240.
        self._useIK = useIK
        self._urdfRoot = urdfRoot
        self._actionRepeat = actionRepeat
        self._observation = []
        self._envStepCounter = 0
        self._renders = renders
        self._maxSteps = maxSteps
        self.terminated = False
        self._cam_dist = 1.3
        self._cam_yaw = 180
        self._cam_pitch = -40
        self._h_table = []
        self._target_dist_max = 0.3
        self._target_dist_min = 0.1
        self._p = p
        self.fixedPositionObj = fixedPositionObj
        self.includeVelObs = includeVelObs
        self.ws_lim = [[0.3, 0.5], [-0.2, 0.2], [0, 1]]

        if self._renders:
            cid = p.connect(p.SHARED_MEMORY)
            if (cid < 0):
                cid = p.connect(p.GUI)
            p.resetDebugVisualizerCamera(2.5, 90, -60, [0.52, -0.2, -0.33])
        else:
            p.connect(p.DIRECT)
def main():

    use_IK = 0
    discreteAction = 0
    use_IK = 1 if discreteAction else use_IK

    env = pandaReachGymEnv(urdfRoot=robot_data.getDataPath(), renders=True, useIK=use_IK, isDiscrete=discreteAction)
    motorsIds = []

    if (env._isDiscrete):
        dv = 12
        motorsIds.append(env._p.addUserDebugParameter("lhPosX", -dv, dv, 0))
    else :
        dv = 1
        #joints_idx = env._icub.motorIndices

        for j in range(7):
            info = env._p.getJointInfo(env._panda.pandaId,j)
            jointName = info[1]
            motorsIds.append(env._p.addUserDebugParameter(jointName.decode("utf-8"), -dv, dv, 0.0))

    done = False
    env._p.addUserDebugText('current hand position',[0,-0.5,1.4],[1.1,0,0])
    idx = env._p.addUserDebugText(' ',[0,-0.5,1.2],[1,0,0])

    for t in range(10000000):
        #env.render()
        action = []
        for motorId in range(7):
            action.append(env._p.readUserDebugParameter(motorId))

        action = int(action[0]) if discreteAction else action

        #print(env.step(action))

        state, reward, done, _ = env.step(action)
        if t%10==0:
            print("reward ", reward)
            print("done ", done)
            env._p.addUserDebugText(' '.join(str(round(e,2)) for e in state[:6]),[0,-0.5,1.2],[1,0,0],replaceItemUniqueId=idx)