コード例 #1
0
ファイル: test_envs.py プロジェクト: mihdalal/rlkit
def test_dummy_vec_env_save_load():
    env_kwargs = dict(
        use_image_obs=True,
        imwidth=64,
        imheight=64,
        reward_type="sparse",
        usage_kwargs=dict(
            max_path_length=5,
            use_dm_backend=True,
            use_raw_action_wrappers=False,
            unflatten_images=False,
        ),
        action_space_kwargs=dict(
            control_mode="primitives",
            action_scale=1,
            camera_settings={
                "distance": 0.38227044687537043,
                "lookat": [0.21052547, 0.32329237, 0.587819],
                "azimuth": 141.328125,
                "elevation": -53.203125160653144,
            },
        ),
    )
    env_suite = "metaworld"
    env_name = "disassemble-v2"
    make_env_lambda = lambda: make_env(env_suite, env_name, env_kwargs)

    n_envs = 2
    envs = [make_env_lambda() for _ in range(n_envs)]
    env = DummyVecEnv(
        envs,
    )
    with tempfile.TemporaryDirectory() as tmpdirname:
        env.save(tmpdirname, "env.pkl")
        env = DummyVecEnv(
            envs[0:1],
        )
        new_env = env.load(tmpdirname, "env.pkl")
    assert new_env.n_envs == n_envs
コード例 #2
0
def test_path_collector_save_load():
    env_kwargs = dict(
        use_image_obs=True,
        imwidth=64,
        imheight=64,
        reward_type="sparse",
        usage_kwargs=dict(
            max_path_length=5,
            use_dm_backend=True,
            use_raw_action_wrappers=False,
            unflatten_images=False,
        ),
        action_space_kwargs=dict(
            control_mode="primitives",
            action_scale=1,
            camera_settings={
                "distance": 0.38227044687537043,
                "lookat": [0.21052547, 0.32329237, 0.587819],
                "azimuth": 141.328125,
                "elevation": -53.203125160653144,
            },
        ),
    )
    actor_kwargs = dict(
        discrete_continuous_dist=True,
        init_std=0.0,
        num_layers=4,
        min_std=0.1,
        dist="tanh_normal_dreamer_v1",
    )
    model_kwargs = dict(
        model_hidden_size=400,
        stochastic_state_size=50,
        deterministic_state_size=200,
        rssm_hidden_size=200,
        reward_num_layers=2,
        pred_discount_num_layers=3,
        gru_layer_norm=True,
        std_act="sigmoid2",
        use_prior_instead_of_posterior=False,
    )
    env_suite = "metaworld"
    env_name = "disassemble-v2"
    eval_envs = [make_env(env_suite, env_name, env_kwargs) for _ in range(1)]
    eval_env = DummyVecEnv(eval_envs, )

    discrete_continuous_dist = True
    continuous_action_dim = eval_envs[0].max_arg_len
    discrete_action_dim = eval_envs[0].num_primitives
    if not discrete_continuous_dist:
        continuous_action_dim = continuous_action_dim + discrete_action_dim
        discrete_action_dim = 0
    action_dim = continuous_action_dim + discrete_action_dim
    obs_dim = eval_env.observation_space.low.size

    world_model = WorldModel(
        action_dim,
        image_shape=eval_envs[0].image_shape,
        **model_kwargs,
    )
    actor = ActorModel(
        model_kwargs["model_hidden_size"],
        world_model.feature_size,
        hidden_activation=nn.ELU,
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        **actor_kwargs,
    )

    eval_policy = DreamerPolicy(
        world_model,
        actor,
        obs_dim,
        action_dim,
        exploration=False,
        expl_amount=0.0,
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        discrete_continuous_dist=discrete_continuous_dist,
    )

    eval_path_collector = VecMdpPathCollector(
        eval_env,
        eval_policy,
        save_env_in_snapshot=False,
    )

    with tempfile.TemporaryDirectory() as tmpdirname:
        eval_path_collector.save(tmpdirname, "path_collector.pkl")
        eval_path_collector = VecMdpPathCollector(
            eval_env,
            eval_policy,
            save_env_in_snapshot=False,
        )
        new_path_collector = eval_path_collector.load(tmpdirname,
                                                      "path_collector.pkl")
コード例 #3
0
def run_trained_policy(path):
    ptu.set_gpu_mode(True)
    variant = json.load(open(osp.join(path, "variant.json"), "r"))
    set_seed(variant["seed"])
    variant = preprocess_variant_llraps(variant)
    env_suite = variant.get("env_suite", "kitchen")
    env_kwargs = variant["env_kwargs"]
    num_low_level_actions_per_primitive = variant[
        "num_low_level_actions_per_primitive"]
    low_level_action_dim = variant["low_level_action_dim"]

    env_name = variant["env_name"]
    make_env_lambda = lambda: make_env(env_suite, env_name, env_kwargs)

    eval_envs = [make_env_lambda() for _ in range(1)]
    eval_env = DummyVecEnv(eval_envs,
                           pass_render_kwargs=variant.get(
                               "pass_render_kwargs", False))

    discrete_continuous_dist = variant["actor_kwargs"][
        "discrete_continuous_dist"]
    num_primitives = eval_envs[0].num_primitives
    continuous_action_dim = eval_envs[0].max_arg_len
    discrete_action_dim = num_primitives
    if not discrete_continuous_dist:
        continuous_action_dim = continuous_action_dim + discrete_action_dim
        discrete_action_dim = 0
    action_dim = continuous_action_dim + discrete_action_dim
    obs_dim = eval_env.observation_space.low.size

    primitive_model = Mlp(
        output_size=variant["low_level_action_dim"],
        input_size=variant["model_kwargs"]["stochastic_state_size"] +
        variant["model_kwargs"]["deterministic_state_size"] +
        eval_env.envs[0].action_space.low.shape[0] + 1,
        hidden_activation=nn.ReLU,
        num_embeddings=eval_envs[0].num_primitives,
        embedding_dim=eval_envs[0].num_primitives,
        embedding_slice=eval_envs[0].num_primitives,
        **variant["primitive_model_kwargs"],
    )

    world_model = LowlevelRAPSWorldModel(
        low_level_action_dim,
        image_shape=eval_envs[0].image_shape,
        primitive_model=primitive_model,
        **variant["model_kwargs"],
    )
    actor = ActorModel(
        variant["model_kwargs"]["model_hidden_size"],
        world_model.feature_size,
        hidden_activation=nn.ELU,
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        **variant["actor_kwargs"],
    )
    actor.load_state_dict(torch.load(osp.join(path, "actor.ptc")))
    world_model.load_state_dict(torch.load(osp.join(path, "world_model.ptc")))

    actor.to(ptu.device)
    world_model.to(ptu.device)

    eval_policy = DreamerLowLevelRAPSPolicy(
        world_model,
        actor,
        obs_dim,
        action_dim,
        num_low_level_actions_per_primitive=num_low_level_actions_per_primitive,
        low_level_action_dim=low_level_action_dim,
        exploration=False,
        expl_amount=0.0,
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        discrete_continuous_dist=discrete_continuous_dist,
    )
    with torch.no_grad():
        with torch.cuda.amp.autocast():
            for step in range(
                    0, variant["algorithm_kwargs"]["max_path_length"] + 1):
                if step == 0:
                    observation = eval_env.envs[0].reset()
                    eval_policy.reset(observation.reshape(1, -1))
                    policy_o = (None, observation.reshape(1, -1))
                    reward = 0
                else:
                    high_level_action, _ = eval_policy.get_action(policy_o, )
                    observation, reward, done, info = eval_env.envs[0].step(
                        high_level_action[0], )
                    low_level_obs = np.expand_dims(
                        np.array(info["low_level_obs"]), 0)
                    low_level_action = np.expand_dims(
                        np.array(info["low_level_action"]), 0)
                    policy_o = (low_level_action, low_level_obs)
    return reward
コード例 #4
0
ファイル: raps_experiment.py プロジェクト: mihdalal/rlkit
def experiment(variant):
    import os
    import os.path as osp

    os.environ["D4RL_SUPPRESS_IMPORT_ERROR"] = "1"

    import torch
    import torch.nn as nn

    import rlkit.envs.primitives_make_env as primitives_make_env
    import rlkit.torch.pytorch_util as ptu
    from rlkit.envs.wrappers.mujoco_vec_wrappers import (
        DummyVecEnv,
        StableBaselinesVecEnv,
    )
    from rlkit.torch.model_based.dreamer.actor_models import ActorModel
    from rlkit.torch.model_based.dreamer.dreamer_policy import (
        ActionSpaceSamplePolicy,
        DreamerPolicy,
    )
    from rlkit.torch.model_based.dreamer.dreamer_v2 import DreamerV2Trainer
    from rlkit.torch.model_based.dreamer.episode_replay_buffer import (
        EpisodeReplayBuffer,
        EpisodeReplayBufferLowLevelRAPS,
    )
    from rlkit.torch.model_based.dreamer.mlp import Mlp
    from rlkit.torch.model_based.dreamer.path_collector import VecMdpPathCollector
    from rlkit.torch.model_based.dreamer.visualization import post_epoch_visualize_func
    from rlkit.torch.model_based.dreamer.world_models import WorldModel
    from rlkit.torch.model_based.rl_algorithm import TorchBatchRLAlgorithm

    env_suite = variant.get("env_suite", "kitchen")
    env_name = variant["env_name"]
    env_kwargs = variant["env_kwargs"]
    use_raw_actions = variant["use_raw_actions"]
    num_expl_envs = variant["num_expl_envs"]
    if num_expl_envs > 1:
        env_fns = [
            lambda: primitives_make_env.make_env(
                env_suite, env_name, env_kwargs) for _ in range(num_expl_envs)
        ]
        expl_env = StableBaselinesVecEnv(
            env_fns=env_fns,
            start_method="fork",
            reload_state_args=(
                num_expl_envs,
                primitives_make_env.make_env,
                (env_suite, env_name, env_kwargs),
            ),
        )
    else:
        expl_envs = [
            primitives_make_env.make_env(env_suite, env_name, env_kwargs)
        ]
        expl_env = DummyVecEnv(expl_envs,
                               pass_render_kwargs=variant.get(
                                   "pass_render_kwargs", False))
    eval_envs = [
        primitives_make_env.make_env(env_suite, env_name, env_kwargs)
        for _ in range(1)
    ]
    eval_env = DummyVecEnv(eval_envs,
                           pass_render_kwargs=variant.get(
                               "pass_render_kwargs", False))
    if use_raw_actions:
        discrete_continuous_dist = False
        continuous_action_dim = eval_env.action_space.low.size
        discrete_action_dim = 0
        use_batch_length = True
        action_dim = continuous_action_dim
    else:
        discrete_continuous_dist = variant["actor_kwargs"][
            "discrete_continuous_dist"]
        continuous_action_dim = eval_envs[0].max_arg_len
        discrete_action_dim = eval_envs[0].num_primitives
        if not discrete_continuous_dist:
            continuous_action_dim = continuous_action_dim + discrete_action_dim
            discrete_action_dim = 0
        action_dim = continuous_action_dim + discrete_action_dim
        use_batch_length = False
    obs_dim = expl_env.observation_space.low.size

    world_model = WorldModel(
        action_dim,
        image_shape=eval_envs[0].image_shape,
        **variant["model_kwargs"],
    )
    actor = ActorModel(
        variant["model_kwargs"]["model_hidden_size"],
        world_model.feature_size,
        hidden_activation=nn.ELU,
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        **variant["actor_kwargs"],
    )
    vf = Mlp(
        hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] *
        variant["vf_kwargs"]["num_layers"],
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=nn.ELU,
    )
    target_vf = Mlp(
        hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] *
        variant["vf_kwargs"]["num_layers"],
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=nn.ELU,
    )
    if variant.get("models_path", None) is not None:
        filename = variant["models_path"]
        actor.load_state_dict(torch.load(osp.join(filename, "actor.ptc")))
        vf.load_state_dict(torch.load(osp.join(filename, "vf.ptc")))
        target_vf.load_state_dict(
            torch.load(osp.join(filename, "target_vf.ptc")))
        world_model.load_state_dict(
            torch.load(osp.join(filename, "world_model.ptc")))
        print("LOADED MODELS")

    expl_policy = DreamerPolicy(
        world_model,
        actor,
        obs_dim,
        action_dim,
        exploration=True,
        expl_amount=variant.get("expl_amount", 0.3),
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        discrete_continuous_dist=discrete_continuous_dist,
    )
    eval_policy = DreamerPolicy(
        world_model,
        actor,
        obs_dim,
        action_dim,
        exploration=False,
        expl_amount=0.0,
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        discrete_continuous_dist=discrete_continuous_dist,
    )

    rand_policy = ActionSpaceSamplePolicy(expl_env)

    expl_path_collector = VecMdpPathCollector(
        expl_env,
        expl_policy,
        save_env_in_snapshot=False,
    )

    eval_path_collector = VecMdpPathCollector(
        eval_env,
        eval_policy,
        save_env_in_snapshot=False,
    )

    variant["replay_buffer_kwargs"]["use_batch_length"] = use_batch_length
    replay_buffer = EpisodeReplayBuffer(
        num_expl_envs,
        obs_dim,
        action_dim,
        **variant["replay_buffer_kwargs"],
    )
    eval_filename = variant.get("eval_buffer_path", None)
    if eval_filename is not None:
        eval_buffer = EpisodeReplayBufferLowLevelRAPS(
            1000,
            expl_env,
            variant["algorithm_kwargs"]["max_path_length"],
            10,
            obs_dim,
            action_dim,
            9,
            replace=False,
        )
        eval_buffer.load_buffer(eval_filename, eval_env.envs[0].num_primitives)
    else:
        eval_buffer = None
    trainer = DreamerV2Trainer(
        actor,
        vf,
        target_vf,
        world_model,
        eval_envs[0].image_shape,
        **variant["trainer_kwargs"],
    )
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        pretrain_policy=rand_policy,
        eval_buffer=eval_buffer,
        **variant["algorithm_kwargs"],
    )
    algorithm.low_level_primitives = False
    if variant.get("generate_video", False):
        post_epoch_visualize_func(algorithm, 0)
    else:
        if variant.get("save_video", False):
            algorithm.post_epoch_funcs.append(post_epoch_visualize_func)
        print("TRAINING")
        algorithm.to(ptu.device)
        algorithm.train()
        if variant.get("save_video", False):
            post_epoch_visualize_func(algorithm, -1)
コード例 #5
0
def experiment(variant):
    import os

    import rlkit.envs.primitives_make_env as primitives_make_env

    os.environ["D4RL_SUPPRESS_IMPORT_ERROR"] = "1"
    import torch

    import rlkit.torch.pytorch_util as ptu
    from rlkit.envs.wrappers.mujoco_vec_wrappers import (
        DummyVecEnv,
        StableBaselinesVecEnv,
    )
    from rlkit.torch.model_based.dreamer.actor_models import ActorModel
    from rlkit.torch.model_based.dreamer.dreamer_policy import (
        ActionSpaceSamplePolicy,
        DreamerPolicy,
    )
    from rlkit.torch.model_based.dreamer.episode_replay_buffer import (
        EpisodeReplayBuffer, )
    from rlkit.torch.model_based.dreamer.mlp import Mlp
    from rlkit.torch.model_based.dreamer.path_collector import VecMdpPathCollector
    from rlkit.torch.model_based.dreamer.visualization import video_post_epoch_func
    from rlkit.torch.model_based.dreamer.world_models import WorldModel
    from rlkit.torch.model_based.plan2explore.latent_space_models import (
        OneStepEnsembleModel, )
    from rlkit.torch.model_based.plan2explore.plan2explore import Plan2ExploreTrainer
    from rlkit.torch.model_based.rl_algorithm import TorchBatchRLAlgorithm

    env_suite = variant.get("env_suite", "kitchen")
    env_name = variant["env_name"]
    env_kwargs = variant["env_kwargs"]
    use_raw_actions = variant["use_raw_actions"]
    num_expl_envs = variant["num_expl_envs"]
    actor_model_class_name = variant.get("actor_model_class", "actor_model")

    if num_expl_envs > 1:
        env_fns = [
            lambda: primitives_make_env.make_env(
                env_suite, env_name, env_kwargs) for _ in range(num_expl_envs)
        ]
        expl_env = StableBaselinesVecEnv(env_fns=env_fns, start_method="fork")
    else:
        expl_envs = [
            primitives_make_env.make_env(env_suite, env_name, env_kwargs)
        ]
        expl_env = DummyVecEnv(expl_envs,
                               pass_render_kwargs=variant.get(
                                   "pass_render_kwargs", False))
    eval_envs = [
        primitives_make_env.make_env(env_suite, env_name, env_kwargs)
        for _ in range(1)
    ]
    eval_env = DummyVecEnv(eval_envs,
                           pass_render_kwargs=variant.get(
                               "pass_render_kwargs", False))
    if use_raw_actions:
        discrete_continuous_dist = False
        continuous_action_dim = eval_env.action_space.low.size
        discrete_action_dim = 0
        use_batch_length = True
        action_dim = continuous_action_dim
    else:
        discrete_continuous_dist = variant["actor_kwargs"][
            "discrete_continuous_dist"]
        continuous_action_dim = eval_envs[0].max_arg_len
        discrete_action_dim = eval_envs[0].num_primitives
        if not discrete_continuous_dist:
            continuous_action_dim = continuous_action_dim + discrete_action_dim
            discrete_action_dim = 0
        action_dim = continuous_action_dim + discrete_action_dim
        use_batch_length = False
    world_model_class = WorldModel
    obs_dim = expl_env.observation_space.low.size
    actor_model_class = ActorModel
    if variant.get("load_from_path", False):
        data = torch.load(variant["models_path"])
        actor = data["trainer/actor"]
        vf = data["trainer/vf"]
        target_vf = data["trainer/target_vf"]
        world_model = data["trainer/world_model"]
    else:
        world_model = world_model_class(
            action_dim,
            image_shape=eval_envs[0].image_shape,
            **variant["model_kwargs"],
            env=eval_envs[0].env,
        )
        actor = actor_model_class(
            variant["model_kwargs"]["model_hidden_size"],
            world_model.feature_size,
            hidden_activation=torch.nn.functional.elu,
            discrete_action_dim=discrete_action_dim,
            continuous_action_dim=continuous_action_dim,
            env=eval_envs[0].env,
            **variant["actor_kwargs"],
        )
        vf = Mlp(
            hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] *
            variant["vf_kwargs"]["num_layers"],
            output_size=1,
            input_size=world_model.feature_size,
            hidden_activation=torch.nn.functional.elu,
        )
        target_vf = Mlp(
            hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] *
            variant["vf_kwargs"]["num_layers"],
            output_size=1,
            input_size=world_model.feature_size,
            hidden_activation=torch.nn.functional.elu,
        )

    one_step_ensemble = OneStepEnsembleModel(
        action_dim=action_dim,
        embedding_size=variant["model_kwargs"]["embedding_size"],
        deterministic_state_size=variant["model_kwargs"]
        ["deterministic_state_size"],
        stochastic_state_size=variant["model_kwargs"]["stochastic_state_size"],
        **variant["one_step_ensemble_kwargs"],
    )

    exploration_actor = actor_model_class(
        variant["model_kwargs"]["model_hidden_size"],
        world_model.feature_size,
        hidden_activation=torch.nn.functional.elu,
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        env=eval_envs[0],
        **variant["actor_kwargs"],
    )
    exploration_vf = Mlp(
        hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] *
        variant["vf_kwargs"]["num_layers"],
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=torch.nn.functional.elu,
    )
    exploration_target_vf = Mlp(
        hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] *
        variant["vf_kwargs"]["num_layers"],
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=torch.nn.functional.elu,
    )

    if variant.get("expl_with_exploration_actor", True):
        expl_actor = exploration_actor
    else:
        expl_actor = actor
    expl_policy = DreamerPolicy(
        world_model,
        expl_actor,
        obs_dim,
        action_dim,
        exploration=True,
        expl_amount=variant.get("expl_amount", 0.3),
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        discrete_continuous_dist=variant["actor_kwargs"]
        ["discrete_continuous_dist"],
    )
    if variant.get("eval_with_exploration_actor", False):
        eval_actor = exploration_actor
    else:
        eval_actor = actor
    eval_policy = DreamerPolicy(
        world_model,
        eval_actor,
        obs_dim,
        action_dim,
        exploration=False,
        expl_amount=0.0,
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        discrete_continuous_dist=variant["actor_kwargs"]
        ["discrete_continuous_dist"],
    )

    rand_policy = ActionSpaceSamplePolicy(expl_env)

    expl_path_collector = VecMdpPathCollector(
        expl_env,
        expl_policy,
        save_env_in_snapshot=False,
    )

    eval_path_collector = VecMdpPathCollector(
        eval_env,
        eval_policy,
        save_env_in_snapshot=False,
    )

    replay_buffer = EpisodeReplayBuffer(
        variant["replay_buffer_size"],
        expl_env,
        variant["algorithm_kwargs"]["max_path_length"] + 1,
        obs_dim,
        action_dim,
        replace=False,
        use_batch_length=use_batch_length,
    )
    trainer = Plan2ExploreTrainer(
        eval_env,
        actor,
        vf,
        target_vf,
        world_model,
        eval_envs[0].image_shape,
        exploration_actor,
        exploration_vf,
        exploration_target_vf,
        one_step_ensemble,
        **variant["trainer_kwargs"],
    )
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        pretrain_policy=rand_policy,
        **variant["algorithm_kwargs"],
    )

    algorithm.post_epoch_funcs.append(video_post_epoch_func)
    algorithm.to(ptu.device)
    algorithm.train()
    video_post_epoch_func(algorithm, -1)
コード例 #6
0
def experiment(variant):
    import os

    from rlkit.envs.wrappers.dmc_wrappers import (
        ActionRepeat,
        DeepMindControl,
        NormalizeActions,
        TimeLimit,
    )
    from rlkit.torch.model_based.dreamer.visualization import post_epoch_visualize_func

    os.environ["D4RL_SUPPRESS_IMPORT_ERROR"] = "1"
    import torch
    import torch.nn as nn

    import rlkit.torch.pytorch_util as ptu
    from rlkit.envs.wrappers.mujoco_vec_wrappers import DummyVecEnv
    from rlkit.torch.model_based.dreamer.actor_models import ActorModel
    from rlkit.torch.model_based.dreamer.dreamer import DreamerTrainer
    from rlkit.torch.model_based.dreamer.dreamer_policy import (
        ActionSpaceSamplePolicy,
        DreamerPolicy,
    )
    from rlkit.torch.model_based.dreamer.dreamer_v2 import DreamerV2Trainer
    from rlkit.torch.model_based.dreamer.episode_replay_buffer import (
        EpisodeReplayBuffer,
    )
    from rlkit.torch.model_based.dreamer.mlp import Mlp
    from rlkit.torch.model_based.dreamer.path_collector import VecMdpPathCollector
    from rlkit.torch.model_based.dreamer.world_models import WorldModel
    from rlkit.torch.model_based.rl_algorithm import TorchBatchRLAlgorithm

    expl_env = DeepMindControl(variant["env_id"])
    expl_env.reset()
    expl_env = ActionRepeat(expl_env, 2)
    expl_env = NormalizeActions(expl_env)
    expl_env = DummyVecEnv([TimeLimit(expl_env, 500)], pass_render_kwargs=False)

    eval_env = DeepMindControl(variant["env_id"])
    eval_env.reset()
    eval_env = ActionRepeat(eval_env, 2)
    eval_env = NormalizeActions(eval_env)
    eval_env = DummyVecEnv([TimeLimit(eval_env, 500)], pass_render_kwargs=False)

    obs_dim = expl_env.observation_space.low.size
    action_dim = expl_env.action_space.low.size

    world_model_class = WorldModel

    world_model = world_model_class(
        action_dim,
        image_shape=(3, 64, 64),
        **variant["model_kwargs"],
    )
    actor = ActorModel(
        variant["model_kwargs"]["model_hidden_size"],
        world_model.feature_size,
        hidden_activation=nn.ELU,
        discrete_action_dim=0,
        continuous_action_dim=eval_env.action_space.low.size,
        **variant["actor_kwargs"],
    )
    vf = Mlp(
        hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]]
        * variant["vf_kwargs"]["num_layers"],
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=nn.ELU,
    )
    target_vf = Mlp(
        hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]]
        * variant["vf_kwargs"]["num_layers"],
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=nn.ELU,
    )
    variant["trainer_kwargs"]["target_vf"] = target_vf

    expl_policy = DreamerPolicy(
        world_model,
        actor,
        obs_dim,
        action_dim,
        exploration=True,
        expl_amount=variant.get("expl_amount", 0.3),
        discrete_action_dim=0,
        continuous_action_dim=eval_env.action_space.low.size,
    )
    eval_policy = DreamerPolicy(
        world_model,
        actor,
        obs_dim,
        action_dim,
        exploration=False,
        expl_amount=0.0,
        discrete_action_dim=0,
        continuous_action_dim=eval_env.action_space.low.size,
    )

    rand_policy = ActionSpaceSamplePolicy(expl_env)

    expl_path_collector = VecMdpPathCollector(
        expl_env,
        expl_policy,
        save_env_in_snapshot=False,
        env_params={},
        env_class={},
    )

    eval_path_collector = VecMdpPathCollector(
        eval_env,
        eval_policy,
        save_env_in_snapshot=False,
        env_params={},
        env_class={},
    )

    replay_buffer = EpisodeReplayBuffer(
        1,
        obs_dim,
        action_dim,
        variant["replay_buffer_size"],
        500,
        replace=False,
        use_batch_length=True,
        batch_length=50,
    )
    trainer_class_name = variant.get("algorithm", "DreamerV2")
    if trainer_class_name == "DreamerV2":
        trainer_class = DreamerV2Trainer
    else:
        trainer_class = DreamerTrainer
    trainer = trainer_class(
        world_model=world_model,
        actor=actor,
        vf=vf,
        image_shape=(3, 64, 64),
        **variant["trainer_kwargs"],
    )
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        pretrain_policy=rand_policy,
        **variant["algorithm_kwargs"],
    )
    algorithm.to(ptu.device)
    print("TRAINING")
    algorithm.to(ptu.device)
    algorithm.train()
コード例 #7
0
def test_trainer_save_load():
    env_kwargs = dict(
        use_image_obs=True,
        imwidth=64,
        imheight=64,
        reward_type="sparse",
        usage_kwargs=dict(
            max_path_length=5,
            use_dm_backend=True,
            use_raw_action_wrappers=False,
            unflatten_images=False,
        ),
        action_space_kwargs=dict(
            control_mode="primitives",
            action_scale=1,
            camera_settings={
                "distance": 0.38227044687537043,
                "lookat": [0.21052547, 0.32329237, 0.587819],
                "azimuth": 141.328125,
                "elevation": -53.203125160653144,
            },
        ),
    )
    actor_kwargs = dict(
        discrete_continuous_dist=True,
        init_std=0.0,
        num_layers=4,
        min_std=0.1,
        dist="tanh_normal_dreamer_v1",
    )
    vf_kwargs = dict(num_layers=3, )
    model_kwargs = dict(
        model_hidden_size=400,
        stochastic_state_size=50,
        deterministic_state_size=200,
        rssm_hidden_size=200,
        reward_num_layers=2,
        pred_discount_num_layers=3,
        gru_layer_norm=True,
        std_act="sigmoid2",
        use_prior_instead_of_posterior=False,
    )
    trainer_kwargs = dict(
        adam_eps=1e-5,
        discount=0.8,
        lam=0.95,
        forward_kl=False,
        free_nats=1.0,
        pred_discount_loss_scale=10.0,
        kl_loss_scale=0.0,
        transition_loss_scale=0.8,
        actor_lr=8e-5,
        vf_lr=8e-5,
        world_model_lr=3e-4,
        reward_loss_scale=2.0,
        use_pred_discount=True,
        policy_gradient_loss_scale=1.0,
        actor_entropy_loss_schedule="1e-4",
        target_update_period=100,
        detach_rewards=False,
        imagination_horizon=5,
    )
    env_suite = "metaworld"
    env_name = "disassemble-v2"
    eval_envs = [make_env(env_suite, env_name, env_kwargs) for _ in range(1)]
    eval_env = DummyVecEnv(eval_envs, )

    discrete_continuous_dist = True
    continuous_action_dim = eval_envs[0].max_arg_len
    discrete_action_dim = eval_envs[0].num_primitives
    if not discrete_continuous_dist:
        continuous_action_dim = continuous_action_dim + discrete_action_dim
        discrete_action_dim = 0
    action_dim = continuous_action_dim + discrete_action_dim

    world_model = WorldModel(
        action_dim,
        image_shape=eval_envs[0].image_shape,
        **model_kwargs,
    )
    actor = ActorModel(
        model_kwargs["model_hidden_size"],
        world_model.feature_size,
        hidden_activation=nn.ELU,
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        **actor_kwargs,
    )

    vf = Mlp(
        hidden_sizes=[model_kwargs["model_hidden_size"]] *
        vf_kwargs["num_layers"],
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=nn.ELU,
    )
    target_vf = Mlp(
        hidden_sizes=[model_kwargs["model_hidden_size"]] *
        vf_kwargs["num_layers"],
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=nn.ELU,
    )

    trainer = DreamerV2Trainer(
        actor,
        vf,
        target_vf,
        world_model,
        eval_envs[0].image_shape,
        **trainer_kwargs,
    )

    with tempfile.TemporaryDirectory() as tmpdirname:
        trainer.save(tmpdirname, "trainer.pkl")
        trainer = DreamerV2Trainer(
            actor,
            vf,
            target_vf,
            world_model,
            eval_envs[0].image_shape,
            **trainer_kwargs,
        )
        new_trainer = trainer.load(tmpdirname, "trainer.pkl")
コード例 #8
0
ファイル: ll_raps_experiment.py プロジェクト: mihdalal/rlkit
def experiment(variant):
    import os
    import os.path as osp

    os.environ["D4RL_SUPPRESS_IMPORT_ERROR"] = "1"

    import torch
    import torch.nn as nn

    import rlkit.torch.pytorch_util as ptu
    from rlkit.core import logger
    from rlkit.envs.primitives_make_env import make_env
    from rlkit.envs.wrappers.mujoco_vec_wrappers import (
        DummyVecEnv,
        StableBaselinesVecEnv,
    )
    from rlkit.torch.model_based.dreamer.actor_models import ActorModel
    from rlkit.torch.model_based.dreamer.dreamer_policy import (
        ActionSpaceSamplePolicy,
        DreamerLowLevelRAPSPolicy,
    )
    from rlkit.torch.model_based.dreamer.dreamer_v2 import DreamerV2LowLevelRAPSTrainer
    from rlkit.torch.model_based.dreamer.episode_replay_buffer import (
        EpisodeReplayBufferLowLevelRAPS, )
    from rlkit.torch.model_based.dreamer.mlp import Mlp
    from rlkit.torch.model_based.dreamer.path_collector import VecMdpPathCollector
    from rlkit.torch.model_based.dreamer.rollout_functions import (
        vec_rollout_low_level_raps, )
    from rlkit.torch.model_based.dreamer.visualization import (
        post_epoch_visualize_func,
        visualize_primitive_unsubsampled_rollout,
    )
    from rlkit.torch.model_based.dreamer.world_models import LowlevelRAPSWorldModel
    from rlkit.torch.model_based.rl_algorithm import TorchBatchRLAlgorithm

    env_suite = variant.get("env_suite", "kitchen")
    env_kwargs = variant["env_kwargs"]
    num_expl_envs = variant["num_expl_envs"]
    num_low_level_actions_per_primitive = variant[
        "num_low_level_actions_per_primitive"]
    low_level_action_dim = variant["low_level_action_dim"]

    print("MAKING ENVS")
    env_name = variant["env_name"]
    make_env_lambda = lambda: make_env(env_suite, env_name, env_kwargs)

    if num_expl_envs > 1:
        env_fns = [make_env_lambda for _ in range(num_expl_envs)]
        expl_env = StableBaselinesVecEnv(
            env_fns=env_fns,
            start_method="fork",
            reload_state_args=(
                num_expl_envs,
                make_env,
                (env_suite, env_name, env_kwargs),
            ),
        )
    else:
        expl_envs = [make_env_lambda()]
        expl_env = DummyVecEnv(expl_envs,
                               pass_render_kwargs=variant.get(
                                   "pass_render_kwargs", False))
    eval_envs = [make_env_lambda() for _ in range(1)]
    eval_env = DummyVecEnv(eval_envs,
                           pass_render_kwargs=variant.get(
                               "pass_render_kwargs", False))

    discrete_continuous_dist = variant["actor_kwargs"][
        "discrete_continuous_dist"]
    num_primitives = eval_envs[0].num_primitives
    continuous_action_dim = eval_envs[0].max_arg_len
    discrete_action_dim = num_primitives
    if not discrete_continuous_dist:
        continuous_action_dim = continuous_action_dim + discrete_action_dim
        discrete_action_dim = 0
    action_dim = continuous_action_dim + discrete_action_dim
    obs_dim = expl_env.observation_space.low.size

    primitive_model = Mlp(
        output_size=variant["low_level_action_dim"],
        input_size=variant["model_kwargs"]["stochastic_state_size"] +
        variant["model_kwargs"]["deterministic_state_size"] +
        eval_env.envs[0].action_space.low.shape[0] + 1,
        hidden_activation=nn.ReLU,
        num_embeddings=eval_envs[0].num_primitives,
        embedding_dim=eval_envs[0].num_primitives,
        embedding_slice=eval_envs[0].num_primitives,
        **variant["primitive_model_kwargs"],
    )
    world_model = LowlevelRAPSWorldModel(
        low_level_action_dim,
        image_shape=eval_envs[0].image_shape,
        primitive_model=primitive_model,
        **variant["model_kwargs"],
    )
    actor = ActorModel(
        variant["model_kwargs"]["model_hidden_size"],
        world_model.feature_size,
        hidden_activation=nn.ELU,
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        **variant["actor_kwargs"],
    )
    vf = Mlp(
        hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] *
        variant["vf_kwargs"]["num_layers"],
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=nn.ELU,
    )
    target_vf = Mlp(
        hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] *
        variant["vf_kwargs"]["num_layers"],
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=nn.ELU,
    )

    if variant.get("models_path", None) is not None:
        filename = variant["models_path"]
        actor.load_state_dict(torch.load(osp.join(filename, "actor.ptc")))
        vf.load_state_dict(torch.load(osp.join(filename, "vf.ptc")))
        target_vf.load_state_dict(
            torch.load(osp.join(filename, "target_vf.ptc")))
        world_model.load_state_dict(
            torch.load(osp.join(filename, "world_model.ptc")))
        print("LOADED MODELS")

    expl_policy = DreamerLowLevelRAPSPolicy(
        world_model,
        actor,
        obs_dim,
        action_dim,
        num_low_level_actions_per_primitive=num_low_level_actions_per_primitive,
        low_level_action_dim=low_level_action_dim,
        exploration=True,
        expl_amount=variant.get("expl_amount", 0.3),
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        discrete_continuous_dist=discrete_continuous_dist,
    )
    eval_policy = DreamerLowLevelRAPSPolicy(
        world_model,
        actor,
        obs_dim,
        action_dim,
        num_low_level_actions_per_primitive=num_low_level_actions_per_primitive,
        low_level_action_dim=low_level_action_dim,
        exploration=False,
        expl_amount=0.0,
        discrete_action_dim=discrete_action_dim,
        continuous_action_dim=continuous_action_dim,
        discrete_continuous_dist=discrete_continuous_dist,
    )

    initial_data_collection_policy = ActionSpaceSamplePolicy(expl_env)

    rollout_function_kwargs = dict(
        num_low_level_actions_per_primitive=num_low_level_actions_per_primitive,
        low_level_action_dim=low_level_action_dim,
        num_primitives=num_primitives,
    )

    expl_path_collector = VecMdpPathCollector(
        expl_env,
        expl_policy,
        save_env_in_snapshot=False,
        rollout_fn=vec_rollout_low_level_raps,
        rollout_function_kwargs=rollout_function_kwargs,
    )

    eval_path_collector = VecMdpPathCollector(
        eval_env,
        eval_policy,
        save_env_in_snapshot=False,
        rollout_fn=vec_rollout_low_level_raps,
        rollout_function_kwargs=rollout_function_kwargs,
    )

    replay_buffer = EpisodeReplayBufferLowLevelRAPS(
        num_expl_envs, obs_dim, action_dim, **variant["replay_buffer_kwargs"])
    filename = variant.get("replay_buffer_path", None)
    if filename is not None:
        replay_buffer.load_buffer(filename, eval_env.envs[0].num_primitives)
    eval_filename = variant.get("eval_buffer_path", None)
    if eval_filename is not None:
        eval_buffer = EpisodeReplayBufferLowLevelRAPS(
            1000,
            expl_env,
            variant["algorithm_kwargs"]["max_path_length"],
            num_low_level_actions_per_primitive,
            obs_dim,
            action_dim,
            low_level_action_dim,
            replace=False,
        )
        eval_buffer.load_buffer(eval_filename, eval_env.envs[0].num_primitives)
    else:
        eval_buffer = None

    trainer = DreamerV2LowLevelRAPSTrainer(
        actor,
        vf,
        target_vf,
        world_model,
        eval_envs[0].image_shape,
        **variant["trainer_kwargs"],
    )
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        pretrain_policy=initial_data_collection_policy,
        **variant["algorithm_kwargs"],
        eval_buffer=eval_buffer,
    )
    algorithm.low_level_primitives = True
    if variant.get("generate_video", False):
        post_epoch_visualize_func(algorithm, 0)
    elif variant.get("unsubsampled_rollout", False):
        visualize_primitive_unsubsampled_rollout(
            make_env_lambda(),
            make_env_lambda(),
            make_env_lambda(),
            logger.get_snapshot_dir(),
            algorithm.max_path_length,
            num_low_level_actions_per_primitive,
            policy=eval_policy,
            img_size=64,
            num_rollouts=4,
        )
    else:
        if variant.get("save_video", False):
            algorithm.post_epoch_funcs.append(post_epoch_visualize_func)
        print("TRAINING")
        algorithm.to(ptu.device)
        algorithm.train()
        if variant.get("save_video", False):
            post_epoch_visualize_func(algorithm, -1)
コード例 #9
0
def experiment(variant):
    import os

    os.environ["D4RL_SUPPRESS_IMPORT_ERROR"] = "1"
    import torch

    import rlkit.torch.pytorch_util as ptu
    from rlkit.envs.wrappers.mujoco_vec_wrappers import DummyVecEnv
    from rlkit.torch.model_based.dreamer.actor_models import ActorModel
    from rlkit.torch.model_based.dreamer.dreamer_policy import (
        ActionSpaceSamplePolicy,
        DreamerPolicy,
    )
    from rlkit.torch.model_based.dreamer.episode_replay_buffer import (
        EpisodeReplayBuffer, )
    from rlkit.torch.model_based.dreamer.mlp import Mlp
    from rlkit.torch.model_based.dreamer.path_collector import VecMdpPathCollector
    from rlkit.torch.model_based.dreamer.world_models import WorldModel
    from rlkit.torch.model_based.plan2explore.latent_space_models import (
        OneStepEnsembleModel, )
    from rlkit.torch.model_based.plan2explore.plan2explore import Plan2ExploreTrainer
    from rlkit.torch.model_based.rl_algorithm import TorchBatchRLAlgorithm

    expl_env = DeepMindControl(variant["env_id"])
    expl_env.reset()
    expl_env = ActionRepeat(expl_env, 2)
    expl_env = NormalizeActions(expl_env)
    expl_env = DummyVecEnv([TimeLimit(expl_env, 500)],
                           pass_render_kwargs=False)

    eval_env = DeepMindControl(variant["env_id"])
    eval_env.reset()
    eval_env = ActionRepeat(eval_env, 2)
    eval_env = NormalizeActions(eval_env)
    eval_env = DummyVecEnv([TimeLimit(eval_env, 500)],
                           pass_render_kwargs=False)

    obs_dim = expl_env.observation_space.low.size
    action_dim = expl_env.action_space.low.size

    world_model_class = WorldModel

    world_model = world_model_class(
        action_dim,
        image_shape=(3, 64, 64),
        **variant["model_kwargs"],
        env=eval_env,
    )
    actor_model_class = ActorModel
    eval_actor = actor_model_class(
        variant["model_kwargs"]["model_hidden_size"],
        world_model.feature_size,
        hidden_activation=torch.nn.functional.elu,
        discrete_action_dim=0,
        continuous_action_dim=eval_env.action_space.low.size,
        env=eval_env,
        **variant["actor_kwargs"],
    )
    vf = Mlp(
        hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] *
        variant["vf_kwargs"]["num_layers"],
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=torch.nn.functional.elu,
    )
    target_vf = Mlp(
        hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] *
        variant["vf_kwargs"]["num_layers"],
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=torch.nn.functional.elu,
    )
    variant["trainer_kwargs"]["target_vf"] = target_vf

    one_step_ensemble = OneStepEnsembleModel(
        action_dim=action_dim,
        embedding_size=variant["model_kwargs"]["embedding_size"],
        deterministic_state_size=variant["model_kwargs"]
        ["deterministic_state_size"],
        stochastic_state_size=variant["model_kwargs"]["stochastic_state_size"],
        **variant["one_step_ensemble_kwargs"],
    )

    exploration_actor = actor_model_class(
        variant["model_kwargs"]["model_hidden_size"],
        world_model.feature_size,
        hidden_activation=torch.nn.functional.elu,
        discrete_action_dim=0,
        continuous_action_dim=eval_env.action_space.low.size,
        env=eval_env,
        **variant["actor_kwargs"],
    )
    exploration_vf = Mlp(
        hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * 3,
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=torch.nn.functional.elu,
    )
    exploration_target_vf = Mlp(
        hidden_sizes=[variant["model_kwargs"]["model_hidden_size"]] * 3,
        output_size=1,
        input_size=world_model.feature_size,
        hidden_activation=torch.nn.functional.elu,
    )
    variant["trainer_kwargs"]["exploration_target_vf"] = exploration_target_vf

    expl_policy = DreamerPolicy(
        world_model,
        exploration_actor,
        obs_dim,
        action_dim,
        exploration=True,
        expl_amount=variant.get("expl_amount", 0.3),
        discrete_action_dim=0,
        continuous_action_dim=eval_env.action_space.low.size,
    )
    eval_policy = DreamerPolicy(
        world_model,
        eval_actor,
        obs_dim,
        action_dim,
        exploration=False,
        expl_amount=0.0,
        discrete_action_dim=0,
        continuous_action_dim=eval_env.action_space.low.size,
    )

    rand_policy = ActionSpaceSamplePolicy(expl_env)

    expl_path_collector = VecMdpPathCollector(
        expl_env,
        expl_policy,
        save_env_in_snapshot=False,
        env_params={},
        env_class={},
    )

    eval_path_collector = VecMdpPathCollector(
        eval_env,
        eval_policy,
        save_env_in_snapshot=False,
        env_params={},
        env_class={},
    )

    replay_buffer = EpisodeReplayBuffer(
        variant["replay_buffer_size"],
        expl_env,
        501,
        obs_dim,
        action_dim,
        replace=False,
        use_batch_length=True,
        batch_length=50,
    )
    trainer = Plan2ExploreTrainer(
        env=eval_env,
        world_model=world_model,
        actor=eval_actor,
        vf=vf,
        image_shape=(3, 64, 64),
        one_step_ensemble=one_step_ensemble,
        exploration_actor=exploration_actor,
        exploration_vf=exploration_vf,
        **variant["trainer_kwargs"],
    )
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        pretrain_policy=rand_policy,
        **variant["algorithm_kwargs"],
    )
    algorithm.to(ptu.device)
    algorithm.train()