def test_dummy_vec_env_save_load(): env_kwargs = dict( use_image_obs=True, imwidth=64, imheight=64, reward_type="sparse", usage_kwargs=dict( max_path_length=5, use_dm_backend=True, use_raw_action_wrappers=False, unflatten_images=False, ), action_space_kwargs=dict( control_mode="primitives", action_scale=1, camera_settings={ "distance": 0.38227044687537043, "lookat": [0.21052547, 0.32329237, 0.587819], "azimuth": 141.328125, "elevation": -53.203125160653144, }, ), ) env_suite = "metaworld" env_name = "disassemble-v2" make_env_lambda = lambda: make_env(env_suite, env_name, env_kwargs) n_envs = 2 envs = [make_env_lambda() for _ in range(n_envs)] env = DummyVecEnv( envs, ) with tempfile.TemporaryDirectory() as tmpdirname: env.save(tmpdirname, "env.pkl") env = DummyVecEnv( envs[0:1], ) new_env = env.load(tmpdirname, "env.pkl") assert new_env.n_envs == n_envs
def test_path_collector_save_load(): env_kwargs = dict( use_image_obs=True, imwidth=64, imheight=64, reward_type="sparse", usage_kwargs=dict( max_path_length=5, use_dm_backend=True, use_raw_action_wrappers=False, unflatten_images=False, ), action_space_kwargs=dict( control_mode="primitives", action_scale=1, camera_settings={ "distance": 0.38227044687537043, "lookat": [0.21052547, 0.32329237, 0.587819], "azimuth": 141.328125, "elevation": -53.203125160653144, }, ), ) actor_kwargs = dict( discrete_continuous_dist=True, init_std=0.0, num_layers=4, min_std=0.1, dist="tanh_normal_dreamer_v1", ) model_kwargs = dict( model_hidden_size=400, stochastic_state_size=50, deterministic_state_size=200, rssm_hidden_size=200, reward_num_layers=2, pred_discount_num_layers=3, gru_layer_norm=True, std_act="sigmoid2", use_prior_instead_of_posterior=False, ) env_suite = "metaworld" env_name = "disassemble-v2" eval_envs = [make_env(env_suite, env_name, env_kwargs) for _ in range(1)] eval_env = DummyVecEnv(eval_envs, ) discrete_continuous_dist = True continuous_action_dim = eval_envs[0].max_arg_len discrete_action_dim = eval_envs[0].num_primitives if not discrete_continuous_dist: continuous_action_dim = continuous_action_dim + discrete_action_dim discrete_action_dim = 0 action_dim = continuous_action_dim + discrete_action_dim obs_dim = eval_env.observation_space.low.size world_model = WorldModel( action_dim, image_shape=eval_envs[0].image_shape, **model_kwargs, ) actor = ActorModel( model_kwargs["model_hidden_size"], world_model.feature_size, hidden_activation=nn.ELU, discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, **actor_kwargs, ) eval_policy = DreamerPolicy( world_model, actor, obs_dim, action_dim, exploration=False, expl_amount=0.0, discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, discrete_continuous_dist=discrete_continuous_dist, ) eval_path_collector = VecMdpPathCollector( eval_env, eval_policy, save_env_in_snapshot=False, ) with tempfile.TemporaryDirectory() as tmpdirname: eval_path_collector.save(tmpdirname, "path_collector.pkl") eval_path_collector = VecMdpPathCollector( eval_env, eval_policy, save_env_in_snapshot=False, ) new_path_collector = eval_path_collector.load(tmpdirname, "path_collector.pkl")
def run_trained_policy(path): ptu.set_gpu_mode(True) variant = json.load(open(osp.join(path, "variant.json"), "r")) set_seed(variant["seed"]) variant = preprocess_variant_llraps(variant) env_suite = variant.get("env_suite", "kitchen") env_kwargs = variant["env_kwargs"] num_low_level_actions_per_primitive = variant[ "num_low_level_actions_per_primitive"] low_level_action_dim = variant["low_level_action_dim"] env_name = variant["env_name"] make_env_lambda = lambda: make_env(env_suite, env_name, env_kwargs) eval_envs = [make_env_lambda() for _ in range(1)] eval_env = DummyVecEnv(eval_envs, pass_render_kwargs=variant.get( "pass_render_kwargs", False)) discrete_continuous_dist = variant["actor_kwargs"][ "discrete_continuous_dist"] num_primitives = eval_envs[0].num_primitives continuous_action_dim = eval_envs[0].max_arg_len discrete_action_dim = num_primitives if not discrete_continuous_dist: continuous_action_dim = continuous_action_dim + discrete_action_dim discrete_action_dim = 0 action_dim = continuous_action_dim + discrete_action_dim obs_dim = eval_env.observation_space.low.size primitive_model = Mlp( output_size=variant["low_level_action_dim"], input_size=variant["model_kwargs"]["stochastic_state_size"] + variant["model_kwargs"]["deterministic_state_size"] + eval_env.envs[0].action_space.low.shape[0] + 1, hidden_activation=nn.ReLU, num_embeddings=eval_envs[0].num_primitives, embedding_dim=eval_envs[0].num_primitives, embedding_slice=eval_envs[0].num_primitives, **variant["primitive_model_kwargs"], ) world_model = LowlevelRAPSWorldModel( low_level_action_dim, image_shape=eval_envs[0].image_shape, primitive_model=primitive_model, **variant["model_kwargs"], ) actor = ActorModel( variant["model_kwargs"]["model_hidden_size"], world_model.feature_size, hidden_activation=nn.ELU, discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, **variant["actor_kwargs"], ) actor.load_state_dict(torch.load(osp.join(path, "actor.ptc"))) world_model.load_state_dict(torch.load(osp.join(path, "world_model.ptc"))) actor.to(ptu.device) world_model.to(ptu.device) eval_policy = DreamerLowLevelRAPSPolicy( world_model, actor, obs_dim, action_dim, num_low_level_actions_per_primitive=num_low_level_actions_per_primitive, low_level_action_dim=low_level_action_dim, exploration=False, expl_amount=0.0, discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, discrete_continuous_dist=discrete_continuous_dist, ) with torch.no_grad(): with torch.cuda.amp.autocast(): for step in range( 0, variant["algorithm_kwargs"]["max_path_length"] + 1): if step == 0: observation = eval_env.envs[0].reset() eval_policy.reset(observation.reshape(1, -1)) policy_o = (None, observation.reshape(1, -1)) reward = 0 else: high_level_action, _ = eval_policy.get_action(policy_o, ) observation, reward, done, info = eval_env.envs[0].step( high_level_action[0], ) low_level_obs = np.expand_dims( np.array(info["low_level_obs"]), 0) low_level_action = np.expand_dims( np.array(info["low_level_action"]), 0) policy_o = (low_level_action, low_level_obs) return reward
def experiment(variant): import os import os.path as osp os.environ["D4RL_SUPPRESS_IMPORT_ERROR"] = "1" import torch import torch.nn as nn import rlkit.envs.primitives_make_env as primitives_make_env import rlkit.torch.pytorch_util as ptu from rlkit.envs.wrappers.mujoco_vec_wrappers import ( DummyVecEnv, StableBaselinesVecEnv, ) from rlkit.torch.model_based.dreamer.actor_models import ActorModel from rlkit.torch.model_based.dreamer.dreamer_policy import ( ActionSpaceSamplePolicy, DreamerPolicy, ) from rlkit.torch.model_based.dreamer.dreamer_v2 import DreamerV2Trainer from rlkit.torch.model_based.dreamer.episode_replay_buffer import ( EpisodeReplayBuffer, EpisodeReplayBufferLowLevelRAPS, ) from rlkit.torch.model_based.dreamer.mlp import Mlp from rlkit.torch.model_based.dreamer.path_collector import VecMdpPathCollector from rlkit.torch.model_based.dreamer.visualization import post_epoch_visualize_func from rlkit.torch.model_based.dreamer.world_models import WorldModel from rlkit.torch.model_based.rl_algorithm import TorchBatchRLAlgorithm env_suite = variant.get("env_suite", "kitchen") env_name = variant["env_name"] env_kwargs = variant["env_kwargs"] use_raw_actions = variant["use_raw_actions"] num_expl_envs = variant["num_expl_envs"] if num_expl_envs > 1: env_fns = [ lambda: primitives_make_env.make_env( env_suite, env_name, env_kwargs) for _ in range(num_expl_envs) ] expl_env = StableBaselinesVecEnv( env_fns=env_fns, start_method="fork", reload_state_args=( num_expl_envs, primitives_make_env.make_env, (env_suite, env_name, env_kwargs), ), ) else: expl_envs = [ primitives_make_env.make_env(env_suite, env_name, env_kwargs) ] expl_env = DummyVecEnv(expl_envs, pass_render_kwargs=variant.get( "pass_render_kwargs", False)) eval_envs = [ primitives_make_env.make_env(env_suite, env_name, env_kwargs) for _ in range(1) ] eval_env = DummyVecEnv(eval_envs, pass_render_kwargs=variant.get( "pass_render_kwargs", False)) if use_raw_actions: discrete_continuous_dist = False continuous_action_dim = eval_env.action_space.low.size discrete_action_dim = 0 use_batch_length = True action_dim = continuous_action_dim else: discrete_continuous_dist = variant["actor_kwargs"][ "discrete_continuous_dist"] continuous_action_dim = eval_envs[0].max_arg_len discrete_action_dim = eval_envs[0].num_primitives if not discrete_continuous_dist: continuous_action_dim = continuous_action_dim + discrete_action_dim discrete_action_dim = 0 action_dim = continuous_action_dim + discrete_action_dim use_batch_length = False obs_dim = expl_env.observation_space.low.size world_model = WorldModel( action_dim, image_shape=eval_envs[0].image_shape, **variant["model_kwargs"], ) actor = ActorModel( variant["model_kwargs"]["model_hidden_size"], world_model.feature_size, hidden_activation=nn.ELU, discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, **variant["actor_kwargs"], ) vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * variant["vf_kwargs"]["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=nn.ELU, ) target_vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * variant["vf_kwargs"]["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=nn.ELU, ) if variant.get("models_path", None) is not None: filename = variant["models_path"] actor.load_state_dict(torch.load(osp.join(filename, "actor.ptc"))) vf.load_state_dict(torch.load(osp.join(filename, "vf.ptc"))) target_vf.load_state_dict( torch.load(osp.join(filename, "target_vf.ptc"))) world_model.load_state_dict( torch.load(osp.join(filename, "world_model.ptc"))) print("LOADED MODELS") expl_policy = DreamerPolicy( world_model, actor, obs_dim, action_dim, exploration=True, expl_amount=variant.get("expl_amount", 0.3), discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, discrete_continuous_dist=discrete_continuous_dist, ) eval_policy = DreamerPolicy( world_model, actor, obs_dim, action_dim, exploration=False, expl_amount=0.0, discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, discrete_continuous_dist=discrete_continuous_dist, ) rand_policy = ActionSpaceSamplePolicy(expl_env) expl_path_collector = VecMdpPathCollector( expl_env, expl_policy, save_env_in_snapshot=False, ) eval_path_collector = VecMdpPathCollector( eval_env, eval_policy, save_env_in_snapshot=False, ) variant["replay_buffer_kwargs"]["use_batch_length"] = use_batch_length replay_buffer = EpisodeReplayBuffer( num_expl_envs, obs_dim, action_dim, **variant["replay_buffer_kwargs"], ) eval_filename = variant.get("eval_buffer_path", None) if eval_filename is not None: eval_buffer = EpisodeReplayBufferLowLevelRAPS( 1000, expl_env, variant["algorithm_kwargs"]["max_path_length"], 10, obs_dim, action_dim, 9, replace=False, ) eval_buffer.load_buffer(eval_filename, eval_env.envs[0].num_primitives) else: eval_buffer = None trainer = DreamerV2Trainer( actor, vf, target_vf, world_model, eval_envs[0].image_shape, **variant["trainer_kwargs"], ) algorithm = TorchBatchRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, replay_buffer=replay_buffer, pretrain_policy=rand_policy, eval_buffer=eval_buffer, **variant["algorithm_kwargs"], ) algorithm.low_level_primitives = False if variant.get("generate_video", False): post_epoch_visualize_func(algorithm, 0) else: if variant.get("save_video", False): algorithm.post_epoch_funcs.append(post_epoch_visualize_func) print("TRAINING") algorithm.to(ptu.device) algorithm.train() if variant.get("save_video", False): post_epoch_visualize_func(algorithm, -1)
def experiment(variant): import os import rlkit.envs.primitives_make_env as primitives_make_env os.environ["D4RL_SUPPRESS_IMPORT_ERROR"] = "1" import torch import rlkit.torch.pytorch_util as ptu from rlkit.envs.wrappers.mujoco_vec_wrappers import ( DummyVecEnv, StableBaselinesVecEnv, ) from rlkit.torch.model_based.dreamer.actor_models import ActorModel from rlkit.torch.model_based.dreamer.dreamer_policy import ( ActionSpaceSamplePolicy, DreamerPolicy, ) from rlkit.torch.model_based.dreamer.episode_replay_buffer import ( EpisodeReplayBuffer, ) from rlkit.torch.model_based.dreamer.mlp import Mlp from rlkit.torch.model_based.dreamer.path_collector import VecMdpPathCollector from rlkit.torch.model_based.dreamer.visualization import video_post_epoch_func from rlkit.torch.model_based.dreamer.world_models import WorldModel from rlkit.torch.model_based.plan2explore.latent_space_models import ( OneStepEnsembleModel, ) from rlkit.torch.model_based.plan2explore.plan2explore import Plan2ExploreTrainer from rlkit.torch.model_based.rl_algorithm import TorchBatchRLAlgorithm env_suite = variant.get("env_suite", "kitchen") env_name = variant["env_name"] env_kwargs = variant["env_kwargs"] use_raw_actions = variant["use_raw_actions"] num_expl_envs = variant["num_expl_envs"] actor_model_class_name = variant.get("actor_model_class", "actor_model") if num_expl_envs > 1: env_fns = [ lambda: primitives_make_env.make_env( env_suite, env_name, env_kwargs) for _ in range(num_expl_envs) ] expl_env = StableBaselinesVecEnv(env_fns=env_fns, start_method="fork") else: expl_envs = [ primitives_make_env.make_env(env_suite, env_name, env_kwargs) ] expl_env = DummyVecEnv(expl_envs, pass_render_kwargs=variant.get( "pass_render_kwargs", False)) eval_envs = [ primitives_make_env.make_env(env_suite, env_name, env_kwargs) for _ in range(1) ] eval_env = DummyVecEnv(eval_envs, pass_render_kwargs=variant.get( "pass_render_kwargs", False)) if use_raw_actions: discrete_continuous_dist = False continuous_action_dim = eval_env.action_space.low.size discrete_action_dim = 0 use_batch_length = True action_dim = continuous_action_dim else: discrete_continuous_dist = variant["actor_kwargs"][ "discrete_continuous_dist"] continuous_action_dim = eval_envs[0].max_arg_len discrete_action_dim = eval_envs[0].num_primitives if not discrete_continuous_dist: continuous_action_dim = continuous_action_dim + discrete_action_dim discrete_action_dim = 0 action_dim = continuous_action_dim + discrete_action_dim use_batch_length = False world_model_class = WorldModel obs_dim = expl_env.observation_space.low.size actor_model_class = ActorModel if variant.get("load_from_path", False): data = torch.load(variant["models_path"]) actor = data["trainer/actor"] vf = data["trainer/vf"] target_vf = data["trainer/target_vf"] world_model = data["trainer/world_model"] else: world_model = world_model_class( action_dim, image_shape=eval_envs[0].image_shape, **variant["model_kwargs"], env=eval_envs[0].env, ) actor = actor_model_class( variant["model_kwargs"]["model_hidden_size"], world_model.feature_size, hidden_activation=torch.nn.functional.elu, discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, env=eval_envs[0].env, **variant["actor_kwargs"], ) vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * variant["vf_kwargs"]["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=torch.nn.functional.elu, ) target_vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * variant["vf_kwargs"]["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=torch.nn.functional.elu, ) one_step_ensemble = OneStepEnsembleModel( action_dim=action_dim, embedding_size=variant["model_kwargs"]["embedding_size"], deterministic_state_size=variant["model_kwargs"] ["deterministic_state_size"], stochastic_state_size=variant["model_kwargs"]["stochastic_state_size"], **variant["one_step_ensemble_kwargs"], ) exploration_actor = actor_model_class( variant["model_kwargs"]["model_hidden_size"], world_model.feature_size, hidden_activation=torch.nn.functional.elu, discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, env=eval_envs[0], **variant["actor_kwargs"], ) exploration_vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * variant["vf_kwargs"]["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=torch.nn.functional.elu, ) exploration_target_vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * variant["vf_kwargs"]["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=torch.nn.functional.elu, ) if variant.get("expl_with_exploration_actor", True): expl_actor = exploration_actor else: expl_actor = actor expl_policy = DreamerPolicy( world_model, expl_actor, obs_dim, action_dim, exploration=True, expl_amount=variant.get("expl_amount", 0.3), discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, discrete_continuous_dist=variant["actor_kwargs"] ["discrete_continuous_dist"], ) if variant.get("eval_with_exploration_actor", False): eval_actor = exploration_actor else: eval_actor = actor eval_policy = DreamerPolicy( world_model, eval_actor, obs_dim, action_dim, exploration=False, expl_amount=0.0, discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, discrete_continuous_dist=variant["actor_kwargs"] ["discrete_continuous_dist"], ) rand_policy = ActionSpaceSamplePolicy(expl_env) expl_path_collector = VecMdpPathCollector( expl_env, expl_policy, save_env_in_snapshot=False, ) eval_path_collector = VecMdpPathCollector( eval_env, eval_policy, save_env_in_snapshot=False, ) replay_buffer = EpisodeReplayBuffer( variant["replay_buffer_size"], expl_env, variant["algorithm_kwargs"]["max_path_length"] + 1, obs_dim, action_dim, replace=False, use_batch_length=use_batch_length, ) trainer = Plan2ExploreTrainer( eval_env, actor, vf, target_vf, world_model, eval_envs[0].image_shape, exploration_actor, exploration_vf, exploration_target_vf, one_step_ensemble, **variant["trainer_kwargs"], ) algorithm = TorchBatchRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, replay_buffer=replay_buffer, pretrain_policy=rand_policy, **variant["algorithm_kwargs"], ) algorithm.post_epoch_funcs.append(video_post_epoch_func) algorithm.to(ptu.device) algorithm.train() video_post_epoch_func(algorithm, -1)
def experiment(variant): import os from rlkit.envs.wrappers.dmc_wrappers import ( ActionRepeat, DeepMindControl, NormalizeActions, TimeLimit, ) from rlkit.torch.model_based.dreamer.visualization import post_epoch_visualize_func os.environ["D4RL_SUPPRESS_IMPORT_ERROR"] = "1" import torch import torch.nn as nn import rlkit.torch.pytorch_util as ptu from rlkit.envs.wrappers.mujoco_vec_wrappers import DummyVecEnv from rlkit.torch.model_based.dreamer.actor_models import ActorModel from rlkit.torch.model_based.dreamer.dreamer import DreamerTrainer from rlkit.torch.model_based.dreamer.dreamer_policy import ( ActionSpaceSamplePolicy, DreamerPolicy, ) from rlkit.torch.model_based.dreamer.dreamer_v2 import DreamerV2Trainer from rlkit.torch.model_based.dreamer.episode_replay_buffer import ( EpisodeReplayBuffer, ) from rlkit.torch.model_based.dreamer.mlp import Mlp from rlkit.torch.model_based.dreamer.path_collector import VecMdpPathCollector from rlkit.torch.model_based.dreamer.world_models import WorldModel from rlkit.torch.model_based.rl_algorithm import TorchBatchRLAlgorithm expl_env = DeepMindControl(variant["env_id"]) expl_env.reset() expl_env = ActionRepeat(expl_env, 2) expl_env = NormalizeActions(expl_env) expl_env = DummyVecEnv([TimeLimit(expl_env, 500)], pass_render_kwargs=False) eval_env = DeepMindControl(variant["env_id"]) eval_env.reset() eval_env = ActionRepeat(eval_env, 2) eval_env = NormalizeActions(eval_env) eval_env = DummyVecEnv([TimeLimit(eval_env, 500)], pass_render_kwargs=False) obs_dim = expl_env.observation_space.low.size action_dim = expl_env.action_space.low.size world_model_class = WorldModel world_model = world_model_class( action_dim, image_shape=(3, 64, 64), **variant["model_kwargs"], ) actor = ActorModel( variant["model_kwargs"]["model_hidden_size"], world_model.feature_size, hidden_activation=nn.ELU, discrete_action_dim=0, continuous_action_dim=eval_env.action_space.low.size, **variant["actor_kwargs"], ) vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * variant["vf_kwargs"]["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=nn.ELU, ) target_vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * variant["vf_kwargs"]["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=nn.ELU, ) variant["trainer_kwargs"]["target_vf"] = target_vf expl_policy = DreamerPolicy( world_model, actor, obs_dim, action_dim, exploration=True, expl_amount=variant.get("expl_amount", 0.3), discrete_action_dim=0, continuous_action_dim=eval_env.action_space.low.size, ) eval_policy = DreamerPolicy( world_model, actor, obs_dim, action_dim, exploration=False, expl_amount=0.0, discrete_action_dim=0, continuous_action_dim=eval_env.action_space.low.size, ) rand_policy = ActionSpaceSamplePolicy(expl_env) expl_path_collector = VecMdpPathCollector( expl_env, expl_policy, save_env_in_snapshot=False, env_params={}, env_class={}, ) eval_path_collector = VecMdpPathCollector( eval_env, eval_policy, save_env_in_snapshot=False, env_params={}, env_class={}, ) replay_buffer = EpisodeReplayBuffer( 1, obs_dim, action_dim, variant["replay_buffer_size"], 500, replace=False, use_batch_length=True, batch_length=50, ) trainer_class_name = variant.get("algorithm", "DreamerV2") if trainer_class_name == "DreamerV2": trainer_class = DreamerV2Trainer else: trainer_class = DreamerTrainer trainer = trainer_class( world_model=world_model, actor=actor, vf=vf, image_shape=(3, 64, 64), **variant["trainer_kwargs"], ) algorithm = TorchBatchRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, replay_buffer=replay_buffer, pretrain_policy=rand_policy, **variant["algorithm_kwargs"], ) algorithm.to(ptu.device) print("TRAINING") algorithm.to(ptu.device) algorithm.train()
def test_trainer_save_load(): env_kwargs = dict( use_image_obs=True, imwidth=64, imheight=64, reward_type="sparse", usage_kwargs=dict( max_path_length=5, use_dm_backend=True, use_raw_action_wrappers=False, unflatten_images=False, ), action_space_kwargs=dict( control_mode="primitives", action_scale=1, camera_settings={ "distance": 0.38227044687537043, "lookat": [0.21052547, 0.32329237, 0.587819], "azimuth": 141.328125, "elevation": -53.203125160653144, }, ), ) actor_kwargs = dict( discrete_continuous_dist=True, init_std=0.0, num_layers=4, min_std=0.1, dist="tanh_normal_dreamer_v1", ) vf_kwargs = dict(num_layers=3, ) model_kwargs = dict( model_hidden_size=400, stochastic_state_size=50, deterministic_state_size=200, rssm_hidden_size=200, reward_num_layers=2, pred_discount_num_layers=3, gru_layer_norm=True, std_act="sigmoid2", use_prior_instead_of_posterior=False, ) trainer_kwargs = dict( adam_eps=1e-5, discount=0.8, lam=0.95, forward_kl=False, free_nats=1.0, pred_discount_loss_scale=10.0, kl_loss_scale=0.0, transition_loss_scale=0.8, actor_lr=8e-5, vf_lr=8e-5, world_model_lr=3e-4, reward_loss_scale=2.0, use_pred_discount=True, policy_gradient_loss_scale=1.0, actor_entropy_loss_schedule="1e-4", target_update_period=100, detach_rewards=False, imagination_horizon=5, ) env_suite = "metaworld" env_name = "disassemble-v2" eval_envs = [make_env(env_suite, env_name, env_kwargs) for _ in range(1)] eval_env = DummyVecEnv(eval_envs, ) discrete_continuous_dist = True continuous_action_dim = eval_envs[0].max_arg_len discrete_action_dim = eval_envs[0].num_primitives if not discrete_continuous_dist: continuous_action_dim = continuous_action_dim + discrete_action_dim discrete_action_dim = 0 action_dim = continuous_action_dim + discrete_action_dim world_model = WorldModel( action_dim, image_shape=eval_envs[0].image_shape, **model_kwargs, ) actor = ActorModel( model_kwargs["model_hidden_size"], world_model.feature_size, hidden_activation=nn.ELU, discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, **actor_kwargs, ) vf = Mlp( hidden_sizes=[model_kwargs["model_hidden_size"]] * vf_kwargs["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=nn.ELU, ) target_vf = Mlp( hidden_sizes=[model_kwargs["model_hidden_size"]] * vf_kwargs["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=nn.ELU, ) trainer = DreamerV2Trainer( actor, vf, target_vf, world_model, eval_envs[0].image_shape, **trainer_kwargs, ) with tempfile.TemporaryDirectory() as tmpdirname: trainer.save(tmpdirname, "trainer.pkl") trainer = DreamerV2Trainer( actor, vf, target_vf, world_model, eval_envs[0].image_shape, **trainer_kwargs, ) new_trainer = trainer.load(tmpdirname, "trainer.pkl")
def experiment(variant): import os import os.path as osp os.environ["D4RL_SUPPRESS_IMPORT_ERROR"] = "1" import torch import torch.nn as nn import rlkit.torch.pytorch_util as ptu from rlkit.core import logger from rlkit.envs.primitives_make_env import make_env from rlkit.envs.wrappers.mujoco_vec_wrappers import ( DummyVecEnv, StableBaselinesVecEnv, ) from rlkit.torch.model_based.dreamer.actor_models import ActorModel from rlkit.torch.model_based.dreamer.dreamer_policy import ( ActionSpaceSamplePolicy, DreamerLowLevelRAPSPolicy, ) from rlkit.torch.model_based.dreamer.dreamer_v2 import DreamerV2LowLevelRAPSTrainer from rlkit.torch.model_based.dreamer.episode_replay_buffer import ( EpisodeReplayBufferLowLevelRAPS, ) from rlkit.torch.model_based.dreamer.mlp import Mlp from rlkit.torch.model_based.dreamer.path_collector import VecMdpPathCollector from rlkit.torch.model_based.dreamer.rollout_functions import ( vec_rollout_low_level_raps, ) from rlkit.torch.model_based.dreamer.visualization import ( post_epoch_visualize_func, visualize_primitive_unsubsampled_rollout, ) from rlkit.torch.model_based.dreamer.world_models import LowlevelRAPSWorldModel from rlkit.torch.model_based.rl_algorithm import TorchBatchRLAlgorithm env_suite = variant.get("env_suite", "kitchen") env_kwargs = variant["env_kwargs"] num_expl_envs = variant["num_expl_envs"] num_low_level_actions_per_primitive = variant[ "num_low_level_actions_per_primitive"] low_level_action_dim = variant["low_level_action_dim"] print("MAKING ENVS") env_name = variant["env_name"] make_env_lambda = lambda: make_env(env_suite, env_name, env_kwargs) if num_expl_envs > 1: env_fns = [make_env_lambda for _ in range(num_expl_envs)] expl_env = StableBaselinesVecEnv( env_fns=env_fns, start_method="fork", reload_state_args=( num_expl_envs, make_env, (env_suite, env_name, env_kwargs), ), ) else: expl_envs = [make_env_lambda()] expl_env = DummyVecEnv(expl_envs, pass_render_kwargs=variant.get( "pass_render_kwargs", False)) eval_envs = [make_env_lambda() for _ in range(1)] eval_env = DummyVecEnv(eval_envs, pass_render_kwargs=variant.get( "pass_render_kwargs", False)) discrete_continuous_dist = variant["actor_kwargs"][ "discrete_continuous_dist"] num_primitives = eval_envs[0].num_primitives continuous_action_dim = eval_envs[0].max_arg_len discrete_action_dim = num_primitives if not discrete_continuous_dist: continuous_action_dim = continuous_action_dim + discrete_action_dim discrete_action_dim = 0 action_dim = continuous_action_dim + discrete_action_dim obs_dim = expl_env.observation_space.low.size primitive_model = Mlp( output_size=variant["low_level_action_dim"], input_size=variant["model_kwargs"]["stochastic_state_size"] + variant["model_kwargs"]["deterministic_state_size"] + eval_env.envs[0].action_space.low.shape[0] + 1, hidden_activation=nn.ReLU, num_embeddings=eval_envs[0].num_primitives, embedding_dim=eval_envs[0].num_primitives, embedding_slice=eval_envs[0].num_primitives, **variant["primitive_model_kwargs"], ) world_model = LowlevelRAPSWorldModel( low_level_action_dim, image_shape=eval_envs[0].image_shape, primitive_model=primitive_model, **variant["model_kwargs"], ) actor = ActorModel( variant["model_kwargs"]["model_hidden_size"], world_model.feature_size, hidden_activation=nn.ELU, discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, **variant["actor_kwargs"], ) vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * variant["vf_kwargs"]["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=nn.ELU, ) target_vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * variant["vf_kwargs"]["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=nn.ELU, ) if variant.get("models_path", None) is not None: filename = variant["models_path"] actor.load_state_dict(torch.load(osp.join(filename, "actor.ptc"))) vf.load_state_dict(torch.load(osp.join(filename, "vf.ptc"))) target_vf.load_state_dict( torch.load(osp.join(filename, "target_vf.ptc"))) world_model.load_state_dict( torch.load(osp.join(filename, "world_model.ptc"))) print("LOADED MODELS") expl_policy = DreamerLowLevelRAPSPolicy( world_model, actor, obs_dim, action_dim, num_low_level_actions_per_primitive=num_low_level_actions_per_primitive, low_level_action_dim=low_level_action_dim, exploration=True, expl_amount=variant.get("expl_amount", 0.3), discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, discrete_continuous_dist=discrete_continuous_dist, ) eval_policy = DreamerLowLevelRAPSPolicy( world_model, actor, obs_dim, action_dim, num_low_level_actions_per_primitive=num_low_level_actions_per_primitive, low_level_action_dim=low_level_action_dim, exploration=False, expl_amount=0.0, discrete_action_dim=discrete_action_dim, continuous_action_dim=continuous_action_dim, discrete_continuous_dist=discrete_continuous_dist, ) initial_data_collection_policy = ActionSpaceSamplePolicy(expl_env) rollout_function_kwargs = dict( num_low_level_actions_per_primitive=num_low_level_actions_per_primitive, low_level_action_dim=low_level_action_dim, num_primitives=num_primitives, ) expl_path_collector = VecMdpPathCollector( expl_env, expl_policy, save_env_in_snapshot=False, rollout_fn=vec_rollout_low_level_raps, rollout_function_kwargs=rollout_function_kwargs, ) eval_path_collector = VecMdpPathCollector( eval_env, eval_policy, save_env_in_snapshot=False, rollout_fn=vec_rollout_low_level_raps, rollout_function_kwargs=rollout_function_kwargs, ) replay_buffer = EpisodeReplayBufferLowLevelRAPS( num_expl_envs, obs_dim, action_dim, **variant["replay_buffer_kwargs"]) filename = variant.get("replay_buffer_path", None) if filename is not None: replay_buffer.load_buffer(filename, eval_env.envs[0].num_primitives) eval_filename = variant.get("eval_buffer_path", None) if eval_filename is not None: eval_buffer = EpisodeReplayBufferLowLevelRAPS( 1000, expl_env, variant["algorithm_kwargs"]["max_path_length"], num_low_level_actions_per_primitive, obs_dim, action_dim, low_level_action_dim, replace=False, ) eval_buffer.load_buffer(eval_filename, eval_env.envs[0].num_primitives) else: eval_buffer = None trainer = DreamerV2LowLevelRAPSTrainer( actor, vf, target_vf, world_model, eval_envs[0].image_shape, **variant["trainer_kwargs"], ) algorithm = TorchBatchRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, replay_buffer=replay_buffer, pretrain_policy=initial_data_collection_policy, **variant["algorithm_kwargs"], eval_buffer=eval_buffer, ) algorithm.low_level_primitives = True if variant.get("generate_video", False): post_epoch_visualize_func(algorithm, 0) elif variant.get("unsubsampled_rollout", False): visualize_primitive_unsubsampled_rollout( make_env_lambda(), make_env_lambda(), make_env_lambda(), logger.get_snapshot_dir(), algorithm.max_path_length, num_low_level_actions_per_primitive, policy=eval_policy, img_size=64, num_rollouts=4, ) else: if variant.get("save_video", False): algorithm.post_epoch_funcs.append(post_epoch_visualize_func) print("TRAINING") algorithm.to(ptu.device) algorithm.train() if variant.get("save_video", False): post_epoch_visualize_func(algorithm, -1)
def experiment(variant): import os os.environ["D4RL_SUPPRESS_IMPORT_ERROR"] = "1" import torch import rlkit.torch.pytorch_util as ptu from rlkit.envs.wrappers.mujoco_vec_wrappers import DummyVecEnv from rlkit.torch.model_based.dreamer.actor_models import ActorModel from rlkit.torch.model_based.dreamer.dreamer_policy import ( ActionSpaceSamplePolicy, DreamerPolicy, ) from rlkit.torch.model_based.dreamer.episode_replay_buffer import ( EpisodeReplayBuffer, ) from rlkit.torch.model_based.dreamer.mlp import Mlp from rlkit.torch.model_based.dreamer.path_collector import VecMdpPathCollector from rlkit.torch.model_based.dreamer.world_models import WorldModel from rlkit.torch.model_based.plan2explore.latent_space_models import ( OneStepEnsembleModel, ) from rlkit.torch.model_based.plan2explore.plan2explore import Plan2ExploreTrainer from rlkit.torch.model_based.rl_algorithm import TorchBatchRLAlgorithm expl_env = DeepMindControl(variant["env_id"]) expl_env.reset() expl_env = ActionRepeat(expl_env, 2) expl_env = NormalizeActions(expl_env) expl_env = DummyVecEnv([TimeLimit(expl_env, 500)], pass_render_kwargs=False) eval_env = DeepMindControl(variant["env_id"]) eval_env.reset() eval_env = ActionRepeat(eval_env, 2) eval_env = NormalizeActions(eval_env) eval_env = DummyVecEnv([TimeLimit(eval_env, 500)], pass_render_kwargs=False) obs_dim = expl_env.observation_space.low.size action_dim = expl_env.action_space.low.size world_model_class = WorldModel world_model = world_model_class( action_dim, image_shape=(3, 64, 64), **variant["model_kwargs"], env=eval_env, ) actor_model_class = ActorModel eval_actor = actor_model_class( variant["model_kwargs"]["model_hidden_size"], world_model.feature_size, hidden_activation=torch.nn.functional.elu, discrete_action_dim=0, continuous_action_dim=eval_env.action_space.low.size, env=eval_env, **variant["actor_kwargs"], ) vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * variant["vf_kwargs"]["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=torch.nn.functional.elu, ) target_vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * variant["vf_kwargs"]["num_layers"], output_size=1, input_size=world_model.feature_size, hidden_activation=torch.nn.functional.elu, ) variant["trainer_kwargs"]["target_vf"] = target_vf one_step_ensemble = OneStepEnsembleModel( action_dim=action_dim, embedding_size=variant["model_kwargs"]["embedding_size"], deterministic_state_size=variant["model_kwargs"] ["deterministic_state_size"], stochastic_state_size=variant["model_kwargs"]["stochastic_state_size"], **variant["one_step_ensemble_kwargs"], ) exploration_actor = actor_model_class( variant["model_kwargs"]["model_hidden_size"], world_model.feature_size, hidden_activation=torch.nn.functional.elu, discrete_action_dim=0, continuous_action_dim=eval_env.action_space.low.size, env=eval_env, **variant["actor_kwargs"], ) exploration_vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * 3, output_size=1, input_size=world_model.feature_size, hidden_activation=torch.nn.functional.elu, ) exploration_target_vf = Mlp( hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * 3, output_size=1, input_size=world_model.feature_size, hidden_activation=torch.nn.functional.elu, ) variant["trainer_kwargs"]["exploration_target_vf"] = exploration_target_vf expl_policy = DreamerPolicy( world_model, exploration_actor, obs_dim, action_dim, exploration=True, expl_amount=variant.get("expl_amount", 0.3), discrete_action_dim=0, continuous_action_dim=eval_env.action_space.low.size, ) eval_policy = DreamerPolicy( world_model, eval_actor, obs_dim, action_dim, exploration=False, expl_amount=0.0, discrete_action_dim=0, continuous_action_dim=eval_env.action_space.low.size, ) rand_policy = ActionSpaceSamplePolicy(expl_env) expl_path_collector = VecMdpPathCollector( expl_env, expl_policy, save_env_in_snapshot=False, env_params={}, env_class={}, ) eval_path_collector = VecMdpPathCollector( eval_env, eval_policy, save_env_in_snapshot=False, env_params={}, env_class={}, ) replay_buffer = EpisodeReplayBuffer( variant["replay_buffer_size"], expl_env, 501, obs_dim, action_dim, replace=False, use_batch_length=True, batch_length=50, ) trainer = Plan2ExploreTrainer( env=eval_env, world_model=world_model, actor=eval_actor, vf=vf, image_shape=(3, 64, 64), one_step_ensemble=one_step_ensemble, exploration_actor=exploration_actor, exploration_vf=exploration_vf, **variant["trainer_kwargs"], ) algorithm = TorchBatchRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, replay_buffer=replay_buffer, pretrain_policy=rand_policy, **variant["algorithm_kwargs"], ) algorithm.to(ptu.device) algorithm.train()