import gym from utils import wrappers from gym.wrappers.time_limit import TimeLimit from algos.test_policy import load_policy_and_env, run_policy _, get_action = load_policy_and_env('./data/2') env = TimeLimit(gym.make("PepperReachCam-v0", gui=True, dense=True), max_episode_steps=100) run_policy(env, get_action)
torch.autograd.profiler.profile(enabled=False) env = TimeLimit(gym.make("PepperPush-v0", gui=True), max_episode_steps=100) ac_kwargs = dict(hidden_sizes=[64, 64, 64], activation=nn.ReLU) rb_kwargs = dict(size=1000000, n_sampled_goal=4, goal_selection_strategy='future') logger_kwargs = dict(output_dir='data/push_su', exp_name='push_su') model = SAC(env=env, actor_critic=core_her.MLPActorCritic, ac_kwargs=ac_kwargs, replay_buffer=replay_buffer_her.ReplayBuffer, rb_kwargs=rb_kwargs, max_ep_len=100, batch_size=256, gamma=0.95, lr=0.0003, update_after=1000, update_every=1, logger_kwargs=logger_kwargs) model.train(steps_per_epoch=2000, epochs=3000) from algos.test_policy import load_policy_and_env, run_policy _, get_action = load_policy_and_env('data/push_su') run_policy(env, get_action)
dense=True, head_motion=False), max_episode_steps=100) ac_kwargs = dict(hidden_sizes=[256, 256, 256], activation=nn.ReLU) rb_kwargs = dict(size=1000000) logger_kwargs = dict(output_dir='data/reach_su', exp_name='reach_su') model = SAC(env=env, actor_critic=core.MLPActorCritic, ac_kwargs=ac_kwargs, replay_buffer=replay_buffer.ReplayBuffer, rb_kwargs=rb_kwargs, max_ep_len=100, batch_size=256, gamma=0.95, lr=0.0003, ent_coef="auto", update_after=1000, update_every=1, logger_kwargs=logger_kwargs) model.train(steps_per_epoch=2000, epochs=1000) from algos.test_policy import load_policy_and_env, run_policy _, get_action = load_policy_and_env('data/reach_su') run_policy(env, get_action)
import gym import gym_pepper from utils.wrappers import VisionWrapper, TorchifyWrapper from gym.wrappers.time_limit import TimeLimit from algos.test_policy import load_policy_and_env, run_policy from cnn.cnn_0 import Net env = TimeLimit(VisionWrapper( TorchifyWrapper(gym.make("PepperReachCam-v0", gui=True, dense=True)), Net, "trained/vision_0.pth"), max_episode_steps=100) _, get_action = load_policy_and_env("trained/0") run_policy(env, get_action)
ac_kwargs = dict(hidden_sizes=[256, 256], activation=nn.ReLU, extractor_module=Extractor) rb_kwargs = dict(size=40000) logger_kwargs = dict(output_dir='data/reach_cam', exp_name='reach_cam') model = SAC(env=env, actor_critic=core_cam.MLPActorCritic, ac_kwargs=ac_kwargs, replay_buffer=replay_buffer_cam.ReplayBuffer, rb_kwargs=rb_kwargs, max_ep_len=100, batch_size=256, gamma=0.95, lr=0.0003, update_after=512, update_every=512, logger_kwargs=logger_kwargs, use_gpu_buffer=True) model.train(steps_per_epoch=1024, epochs=5000) from algos.test_policy import load_policy_and_env, run_policy _, get_action = load_policy_and_env('data/reach_cam', deterministic=True) run_policy(env, get_action)
env = gym.make("FetchReach-v1") ac_kwargs = dict(hidden_sizes=[64, 64], activation=nn.ReLU) rb_kwargs = dict(size=1000000, n_sampled_goal=4, goal_selection_strategy='future') logger_kwargs = dict(output_dir='data/fetch_reach_su', exp_name='fetch_reach_su') model = SAC(env=env, actor_critic=core_her.MLPActorCritic, ac_kwargs=ac_kwargs, replay_buffer=replay_buffer_her.ReplayBuffer, rb_kwargs=rb_kwargs, max_ep_len=100, batch_size=256, gamma=0.95, lr=0.001, ent_coef="auto", update_after=1000, update_every=1, logger_kwargs=logger_kwargs) model.train(steps_per_epoch=3000, epochs=10) from algos.test_policy import load_policy_and_env, run_policy _, get_action = load_policy_and_env('data/fetch_reach_su', deterministic=True) run_policy(env, get_action)
import gym from utils import wrappers from gym.wrappers.time_limit import TimeLimit from algos.test_policy import load_policy_and_env, run_policy _, get_action = load_policy_and_env('trained/1') env = TimeLimit(gym.make("PepperPush-v0", gui=True), max_episode_steps=100) run_policy(env, get_action)