Example #1
0
def test_model_manipulation(model_class, goal_selection_strategy):
    env = BitFlippingEnv(N_BITS,
                         continuous=model_class in [DDPG, SAC],
                         max_steps=N_BITS)
    env = DummyVecEnv([lambda: env])

    model = HER('MlpPolicy',
                env,
                model_class,
                n_sampled_goal=3,
                goal_selection_strategy=goal_selection_strategy,
                verbose=0)
    model.learn(1000)

    model_predict(model, env, n_steps=100, additional_check=None)

    model.save('./test_her')
    del model

    # NOTE: HER does not support VecEnvWrapper yet
    with pytest.raises(AssertionError):
        model = HER.load('./test_her', env=VecNormalize(env))

    model = HER.load('./test_her')

    # Check that the model raises an error when the env
    # is not wrapped (or no env passed to the model)
    with pytest.raises(ValueError):
        model.predict(env.reset())

    env_ = BitFlippingEnv(N_BITS,
                          continuous=model_class in [DDPG, SAC],
                          max_steps=N_BITS)
    env_ = HERGoalEnvWrapper(env_)

    model_predict(model, env_, n_steps=100, additional_check=None)

    model.set_env(env)
    model.learn(1000)

    model_predict(model, env_, n_steps=100, additional_check=None)

    assert model.n_sampled_goal == 3

    del model

    env = BitFlippingEnv(N_BITS,
                         continuous=model_class in [DDPG, SAC],
                         max_steps=N_BITS)
    model = HER.load('./test_her', env=env)
    model.learn(1000)

    model_predict(model, env_, n_steps=100, additional_check=None)

    assert model.n_sampled_goal == 3

    if os.path.isfile('./test_her.pkl'):
        os.remove('./test_her.pkl')
Example #2
0
class HERSACAgent(Agent):
    name = "her-sac"

    def __init__(self,
                 env: ISettableGoalEnv,
                 verbose=1,
                 rank=0,
                 experiment_name="her-sac"):
        self._env = env
        self._dirs = Dirs(
            experiment_name=f"{type(env).__name__}-{experiment_name}",
            rank=rank)
        options = {
            "env": env,
            "tensorboard_log": self._dirs.tensorboard,
            "model_class": SAC,
            "gamma": 1,
            "learning_rate": 3e-3
        }
        if os.path.isdir(self._dirs.models) and os.path.isfile(
                self._dirs.best_model):
            self._model = HER.load(load_path=self._dirs.best_model, **options)
            print(f"Loaded model {self._dirs.best_model}")
        else:
            self._model = HER(policy="MlpPolicy", verbose=verbose, **options)

    def __call__(self, obs: Observation) -> np.ndarray:
        action, _ = self._model.predict(obs, deterministic=True)
        return action

    def train(self,
              timesteps: int,
              callbacks: Sequence[BaseCallback] = None,
              num_checkpoints=4) -> None:
        callbacks = [] if callbacks is None else callbacks
        cb = CheckpointCallback(save_freq=timesteps // num_checkpoints,
                                save_path=self._dirs.models,
                                name_prefix=self._dirs.prefix)
        self._model.learn(total_timesteps=timesteps,
                          callback=CallbackList([cb, *callbacks]))
            ent_coef='auto',
            random_exploration=0.3,
            learning_starts=1000,
            train_freq=1,
            policy_kwargs=dict(layers=[256, 256, 256]),
            tensorboard_log="./OpenAI/")
# Train the model
model.learn(int(8e6))

model.save("./model2")

# WARNING: you must pass an env
# or wrap your environment with HERGoalEnvWrapper to use the predict method
model = HER.load('./model2', env=env)

obs = env.reset()
episodes = 0
successes = 0
step = 0
while (episodes < 50):
    step += 1
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    env.render()
    if done or step > 1000:
        obs = env.reset()
        episodes += 1
        if _['is_success']:
            successes += 1

print('success_rate = ' + str(successes / episodes))
Example #4
0
print('setting up model')
model = HER('MlpPolicy',
            env,
            SAC,
            n_sampled_goal=4,
            goal_selection_strategy='future',
            verbose=1,
            buffer_size=int(1e6),
            learning_rate=1e-3,
            gamma=0.95,
            batch_size=256)
print('start learning')
model.learn(total_timesteps=256)
print('learning done')

#Here we need to restart the environent to make rendering possible
#(doesn't work with the wrappers right now)
env = REALRobotEnv(objects=1)
env = GoalWrapper(env, crop_obs=True)
env.render("human")

print('display model')
observation = env.reset()
action = env.action_space.sample()
reward, done = 0, False
for t in range(100):
    model_action, _ = model.predict(observation)

    observation, reward, done, info = env.step(model_action)
    #print(model_action)
Example #5
0
import gym
import time

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2, DQN, HER, DDPG

import synergyenvs

env = gym.make("GraspBoxPybullet-v0")
env.render()
o = env.reset()

# model = PPO2(MlpPolicy, env, verbose=1)
model = HER('MlpPolicy', env, DDPG, n_sampled_goal=4, verbose=0)
model.load("./her_graspbox-1")

env.camera_adjust()

for _ in range(10000):
    env.render()
    action, _states = model.predict(o)
    # action = env.action_space.sample()
    o, r, done, info = env.step(action)
    print(o, r, done, info)
    if done:
        o = env.reset()
    time.sleep(0.1)

env.close()
Example #6
0
from stable_baselines import DDPG, HER, TD3, SAC

env = gym.make('FetchReach-v1')

#model = DDPG('MlpPolicy', env)
model = HER('MlpPolicy',
            env,
            DDPG,
            goal_selection_strategy='final',
            n_sampled_goal=4)
model.learn(50000000)
model.save('./her_fetch_reach')

#model = HER.load('./her_fetch_reach', env=env)

for _ in range(100):
    obs = env.reset()
    state = None
    done = False
    _reward = 0

    while not done:
        env.render()
        action, state = model.predict(obs)
        obs, reward, done, info = env.step(action)
        _reward += reward

    print("Reward = {}".format(_reward))

env.close()
### Run the learning process
model.learn(total_timesteps=400000, log_interval=1, reset_num_timesteps=False)

# Save the model if desired
# model.save("acrobot_her_baseline.pkl")

### Enjoy a trained agent

# duration of the simulations in seconds
t_end = 20

# Desired goal
desired_goal = 0.95 * env.env._tipPosZMax  # As difficult as possible

# Run the simulation in real-time
env.reset()
env.env.goal[0] = desired_goal
obs = env.env._get_obs()
episode_reward = 0
for _ in range(int(t_end / env.dt)):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    env.render()
    time.sleep(env.dt)

    episode_reward += reward
    if done or info.get('is_success', False):
        print("Reward:", episode_reward, "Success:",
              info.get('is_success', False))
        break