Ejemplo n.º 1
0
def main():
    model_class = DDPG  # works also with SAC and DDPG

    # -j
    action_space = 7
    # -p
    fixed = True
    # -o
    normalize_observations = False
    # -g
    gamma = 0.9
    # -b
    #batch_size = 16
    # -m
    memory_limit = 1000000
    # -r
    normalize_returns = True
    # -t
    timesteps = 1000000
    policy_name = "pushing_policy"
    discreteAction = 0
    rend = False
    env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(),
                                 renders=rend,
                                 useIK=0,
                                 isDiscrete=discreteAction,
                                 action_space=action_space,
                                 fixedPositionObj=fixed,
                                 includeVelObs=True)

    # Available strategies (cf paper): future, final, episode, random
    goal_selection_strategy = 'future'  # equivalent to GoalSelectionStrategy.FUTURE
    n_actions = env.action_space.shape[-1]
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),
                                                sigma=float(0.5) *
                                                np.ones(n_actions))
    # Wrap the model

    model = HER(
        CustomPolicy,
        env,
        model_class,
        n_sampled_goal=4,
        goal_selection_strategy=goal_selection_strategy,
        verbose=1,
        tensorboard_log=
        "../pybullet_logs/panda_push_ddpg/stable_baselines/DDPG+HER_FIXED_DYN_RAND",
        buffer_size=1000000,
        batch_size=256,
        random_exploration=0.3,
        action_noise=action_noise)

    # Train the model starting from a previous policy
    model.learn(timesteps)
    print("Saving Policy")
    model.save("../policies/pushing_fixed_HER_Dyn_Rand")
def main(load_policy=False):

    global log_dir, log_dir_policy
    if (load_policy):
          log_dir_policy = '../policies/PUSHING_TD3+HER_FIXED_POSITION_DYN_RAND_FROM_FIXED_PHYSICS'
    model_class = TD3  # works also with SAC and DDPG
    action_space = 7
    fixed = True
    normalize_observations = False
    gamma = 0.9
    memory_limit = 1000000
    normalize_returns = True
    timesteps = 1500000
    discreteAction = 0
    rend = False
    env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0,
            isDiscrete=discreteAction, action_space = action_space,
            fixedPositionObj = fixed, includeVelObs = True)


    env = Monitor(env, log_dir, allow_early_resets=True)
    # Available strategies (cf paper): future, final, episode, random
    goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE
    n_actions = env.action_space.shape[-1]
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))
    # Wrap the model

    model = HER(CustomPolicy, env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy,
                verbose=1,tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3+HER_FIXED_DYN_RAND", buffer_size=1000000,batch_size=256,
                random_exploration=0.3, action_noise=action_noise)

    if (load_policy):
        model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env, n_sampled_goal=4,
        goal_selection_strategy=goal_selection_strategy,
        tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3+HER_FIXED_DYN_RAND_FROM_FIXED_PHYSICS",
        buffer_size=1000000,batch_size=256,random_exploration=0.3, action_noise=action_noise)

    # Train the model starting from a previous policy
    model.learn(timesteps, callback = callback )
    model.save("../policies/PUSHING_FIXED_TD3_DYN_RAND")
    print("Finished train1")
Ejemplo n.º 3
0
# -b
batch_size = 16
# -m
memory_limit = 1000000
# -r
normalize_returns = True
# -t
timesteps = 10000000
policy_name = "pushing_policy"
discreteAction = 0
rend = True

env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(),
                             renders=rend,
                             useIK=0,
                             isDiscrete=discreteAction,
                             numControlledJoints=numControlledJoints,
                             fixedPositionObj=fixed,
                             includeVelObs=True)

# Wrap the model
model = HER.load("../policies/pushing_fixed_HER_Dyn_Rand0.pkl", env=env)

obs = env.reset()

for i in range(10000):
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    if done or i == 500:
        obs = env.reset()
normalize_observations = False
# -g
gamma = 0.9
# -b
batch_size = 16
# -m
memory_limit = 1000000
# -r
normalize_returns = True
# -t
timesteps = 1000000
policy_name = "pushing_policy"
discreteAction = 0
rend = True

env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0,
        isDiscrete=discreteAction, action_space = action_space,
        fixedPositionObj = fixed, includeVelObs = True, object_position=0, test_phase = True, alg = 'td3_normal_policy_to_different_physics', type_physics=2, max_episode_steps=500)

goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE
# Wrap the model
model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env)

obs = env.reset()

for _ in range(10000):
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    if done:
        obs = env.reset()