Example #1
0
    def __init__(self, session, agents=None, **params):
        self.agents = agents
        self.params = params
        self.session = session

        # Create models
        extra_env_kwargs = self.params.get("extra_env_kwargs", {})
        menv = MakeEnvironment(session,
                               mode=self.params["mode"],
                               seed=self.params["seed"],
                               **self.params["env"])
        self.envs = menv.create_envs(num_workers=self.params["num_workers"],
                                     extra_env_kwargs=extra_env_kwargs)

        # self.params["env"]["env_type"]

        self.state = {}
        self.state["steps"] = 0
        self.state["n_episode"] = 0
        self.state["timesteps"] = 0
        self.state["was_reset"] = False

        self.local = {}
        self.local["steps"] = 0
        self.local["n_episode"] = 0

        self.monitor_n_episode()
        self.monitor_timesteps()

        # We only reset once. Later environments will be reset automatically.
        self.reset()
Example #2
0
def gen_params(cpanel):
    params = {}
    # Environment
    params["env"] = {}
    params["env"]["name"] = cpanel["model_name"]

    params["env"]["from_module"] = cpanel.get("from_module", '')
    params["env"]["from_params"] = cpanel.get("from_params", False)

    if params["env"]["from_params"]:
        # For having environment from parameters
        from digideep.environment.dmc2gym.registration import EnvCreator
        from dextron.zoo.hand_env.hand import grasp

        task_kwargs = {
            "generator_type": cpanel[
                "generator_type"],  # Algorithm for generating trajectory: simulated/real
            "generator_args": {
                "time_scale_offset": cpanel["time_scale_offset"],
                "time_scale_factor": cpanel["time_scale_factor"],
                "time_noise_factor": cpanel["time_noise_factor"],
                "time_staying_more": cpanel["time_staying_more"],  # timesteps
                "extracts_path": cpanel["extracts_path"],
                "database_filename": cpanel["database_filename"]
            },
            "random": None,
            "pub_cameras": PUB_CAMERAS,
            "reward_type": "reward/20",
            "controller_gain": cpanel["controller_gain"]
        }

        # visualize_reward=True
        environment_kwargs = {
            "time_limit": cpanel["time_limit"],
            "control_timestep": cpanel["control_timestep"]
        }
        params["env"]["register_args"] = {
            "id": cpanel["model_name"],
            "entry_point":
            "digideep.environment.dmc2gym.wrapper:DmControlWrapper",
            "kwargs": {
                'dmcenv_creator':
                EnvCreator(grasp,
                           task_kwargs=task_kwargs,
                           environment_kwargs=environment_kwargs,
                           visualize_reward=True),
                'flat_observation':
                False,
                'observation_key':
                "agent"
            }
        }

    ##############################################
    ### Normal Wrappers ###
    #######################
    norm_wrappers = []

    # Converting observation to 1 level
    # if not PUB_CAMERAS:
    #     norm_wrappers.append(dict(name="digideep.environment.wrappers.normal.WrapperLevelDictObs",
    #                             args={"path":cpanel["observation_key"],
    #                             },
    #                             enabled=True))

    # Normalizing actions (to be in [-1, 1])
    norm_wrappers.append(
        dict(
            name=
            "digideep.environment.wrappers.normalizers.WrapperNormalizeActDict",
            args={"paths": ["agent"]},
            enabled=False))

    ##############################################
    ### Vector Wrappers ###
    #######################
    vect_wrappers = []

    # Normalizing rewards
    vect_wrappers.append(
        dict(
            name="digideep.environment.wrappers.normalizers.VecNormalizeRew",
            args={
                "clip": 5,  # 10
                "gamma": cpanel["gamma"],
                "epsilon": 1e-8
            },
            enabled=False))  # Not a good idea to normalize sparse rewards.

    # Log successful parameter sets for the expert policy
    vect_wrappers.append(
        dict(
            name="dextron.wrappers.success_logger.VecSuccessLogger",
            request_for_args=["session_state"],
            args={
                "threshold":
                cpanel["reward_threshold"],  # Remove only zero-rewards
                "interval": 100,  # How many episodes to print the log report?
                "num_workers": cpanel["num_workers"],
                "info_keys": ["/rand"],
                "obs_keys": ["/parameters"]
            },
            enabled=True))

    ##############################################
    params["env"]["main_wrappers"] = {
        "Monitor": {
            "allow_early_resets":
            True,  # We need it to allow early resets in the test environment.
            "reset_keywords": (),
            "info_keywords": ()
        },
        "WrapperDummyMultiAgent": {
            "agent_name": "agent"
        },
        "WrapperDummyDictObs": {
            "observation_key": "agent"
        }
    }
    params["env"]["norm_wrappers"] = norm_wrappers
    params["env"]["vect_wrappers"] = vect_wrappers

    menv = MakeEnvironment(session=None, mode=None, seed=1, **params["env"])
    params["env"]["config"] = menv.get_config()

    #####################################
    # Runner: [episode < cycle < epoch] #
    #####################################
    params["runner"] = {}
    params["runner"]["name"] = cpanel.get("runner_name",
                                          "digideep.pipeline.Runner")
    params["runner"]["max_time"] = cpanel.get("max_exec_time", None)
    params["runner"]["max_iter"] = cpanel.get("max_exec_iter", None)
    params["runner"]["n_cycles"] = cpanel[
        "epoch_size"]  # Meaning that 100 cycles are 1 epoch.
    params["runner"]["n_epochs"] = cpanel[
        "number_epochs"]  # Testing and savings are done after each epoch.
    params["runner"]["randargs"] = {
        'seed': cpanel["seed"],
        'cuda_deterministic': cpanel["cuda_deterministic"]
    }
    params["runner"]["test_act"] = cpanel["test_activate"]  # Test Activate
    params["runner"]["test_int"] = cpanel["test_interval"]  # Test Interval
    params["runner"]["save_int"] = cpanel["save_interval"]  # Save Interval

    params["agents"] = {}
    ##############################################
    ### Agent (#1) ### Demonstrator
    ##################
    params["agents"]["demonstrator"] = {}
    params["agents"]["demonstrator"]["name"] = "demonstrator"
    params["agents"]["demonstrator"][
        "type"] = "dextron.agent.demonstrator.NaiveController"
    params["agents"]["demonstrator"]["methodargs"] = {}
    agent_name = params["agents"]["demonstrator"]["name"]
    params["agents"]["demonstrator"]["methodargs"]["act_space"] = params[
        "env"]["config"]["action_space"][agent_name]
    ##############################################

    # ##############################################
    # ### Memory ###
    # ##############
    params["memory"] = {}

    ##############################################
    ### Explorer ###
    ################
    params["explorer"] = {}

    params["explorer"]["train"] = {}
    params["explorer"]["train"]["mode"] = "train"
    params["explorer"]["train"]["env"] = params["env"]
    params["explorer"]["train"]["do_reset"] = False
    params["explorer"]["train"]["final_action"] = False
    params["explorer"]["train"]["warm_start"] = 0
    params["explorer"]["train"]["num_workers"] = cpanel["num_workers"]
    params["explorer"]["train"][
        "deterministic"] = False  # MUST: Takes random actions
    params["explorer"]["train"]["n_steps"] = cpanel[
        "n_steps"]  # Number of steps to take a step in the environment
    params["explorer"]["train"][
        "n_episodes"] = None  # Do not limit # of episodes
    params["explorer"]["train"][
        "win_size"] = 20  # Number of episodes to episode reward for report
    params["explorer"]["train"]["render"] = False
    params["explorer"]["train"]["render_delay"] = 0
    params["explorer"]["train"]["seed"] = cpanel["seed"] + 90
    params["explorer"]["train"]["extra_env_kwargs"] = {
        "mode": params["explorer"]["train"]["mode"],
        "allow_demos": False
    }

    params["explorer"]["test"] = {}
    params["explorer"]["test"]["mode"] = "test"
    params["explorer"]["test"]["env"] = params["env"]
    params["explorer"]["test"]["do_reset"] = True
    params["explorer"]["test"]["final_action"] = False
    params["explorer"]["test"]["warm_start"] = 0
    params["explorer"]["test"]["num_workers"] = cpanel[
        "num_workers"]  # We can use the same amount of workers for testing!
    params["explorer"]["test"][
        "deterministic"] = True  # MUST: Takes the best action
    params["explorer"]["test"]["n_steps"] = None  # Do not limit # of steps
    params["explorer"]["test"]["n_episodes"] = cpanel["test_win_size"]
    params["explorer"]["test"]["win_size"] = cpanel[
        "test_win_size"]  # Extra episodes won't be counted
    params["explorer"]["test"]["render"] = False
    params["explorer"]["test"]["render_delay"] = 0
    params["explorer"]["test"]["seed"] = cpanel[
        "seed"] + 100  # We want to make the seed of test environments different from training.
    params["explorer"]["test"]["extra_env_kwargs"] = {
        "mode": params["explorer"]["test"]["mode"],
        "allow_demos": False
    }

    params["explorer"]["eval"] = {}
    params["explorer"]["eval"]["mode"] = "eval"
    params["explorer"]["eval"]["env"] = params["env"]
    params["explorer"]["eval"]["do_reset"] = False
    params["explorer"]["eval"]["final_action"] = False
    params["explorer"]["eval"]["warm_start"] = 0
    params["explorer"]["eval"]["num_workers"] = 1
    params["explorer"]["eval"][
        "deterministic"] = True  # MUST: Takes the best action
    params["explorer"]["eval"]["n_steps"] = None  # Do not limit # of steps
    params["explorer"]["eval"]["n_episodes"] = 1
    params["explorer"]["eval"]["win_size"] = -1
    params["explorer"]["eval"]["render"] = True
    params["explorer"]["eval"]["render_delay"] = 0
    params["explorer"]["eval"]["seed"] = cpanel[
        "seed"] + 101  # We want to make the seed of eval environment different from test/train.
    params["explorer"]["eval"]["extra_env_kwargs"] = {
        "mode": params["explorer"]["eval"]["mode"],
        "allow_demos": cpanel.get("allow_demos", False)
    }
    ##############################################

    params["explorer"]["demo"] = {}
    params["explorer"]["demo"]["mode"] = "demo"
    params["explorer"]["demo"]["env"] = params["env"]
    params["explorer"]["demo"]["do_reset"] = False
    params["explorer"]["demo"]["final_action"] = False
    params["explorer"]["demo"]["warm_start"] = 0
    params["explorer"]["demo"]["num_workers"] = cpanel["num_workers"]
    params["explorer"]["demo"][
        "deterministic"] = False  # MUST: Takes random actions
    params["explorer"]["demo"]["n_steps"] = cpanel[
        "n_steps"]  # Number of steps to take a step in the environment
    params["explorer"]["demo"]["n_episodes"] = None
    params["explorer"]["demo"]["win_size"] = -1
    params["explorer"]["demo"]["render"] = cpanel["render"]
    params["explorer"]["demo"]["render_delay"] = 0
    params["explorer"]["demo"]["seed"] = cpanel["seed"] + 50
    params["explorer"]["demo"]["extra_env_kwargs"] = {
        "mode": params["explorer"]["demo"]["mode"],
        "allow_demos": True
    }

    return params
Example #3
0
def gen_params(cpanel):
    params = {}
    # Environment
    params["env"] = {}
    params["env"]["name"]   = cpanel["model_name"]
    
    params["env"]["from_module"] = cpanel.get("from_module", '')
    params["env"]["from_params"] = cpanel.get("from_params", False)

    
    ##############################################
    ### Normal Wrappers ###
    #######################
    norm_wrappers = []

    # Converting observation to 1 level
    norm_wrappers.append(dict(name="digideep.environment.wrappers.normal.WrapperLevelDictObs",
                              args={"path":cpanel["observation_key"],
                              },
                              enabled=False))
    # Normalizing actions (to be in [-1, 1])
    norm_wrappers.append(dict(name="digideep.environment.wrappers.normalizers.WrapperNormalizeActDict",
                              args={"paths":["agent"]},
                              enabled=True))

    ##############################################
    ### Vector Wrappers ###
    #######################
    vect_wrappers = []

    # Normalizing observations
    vect_wrappers.append(dict(name="digideep.environment.wrappers.normalizers.VecNormalizeObsDict",
                              args={"paths":[cpanel["observation_key"]],
                                    "clip":10,
                                    "epsilon":1e-8
                              },
                              enabled=False))
    # Normalizing rewards
    vect_wrappers.append(dict(name="digideep.environment.wrappers.normalizers.VecNormalizeRew",
                              args={"clip":10,
                                    "gamma":cpanel["gamma"],
                                    "epsilon":1e-8
                              },
                              enabled=False))
    ##############################################
    params["env"]["main_wrappers"] = {"Monitor":{"allow_early_resets":True, # We need it to allow early resets in the test environment.
                                                 "reset_keywords":(),
                                                 "info_keywords":()},
                                      "WrapperDummyMultiAgent":{"agent_name":"agent"},
                                      "WrapperDummyDictObs":{"observation_key":"agent"}
                                     }
    params["env"]["norm_wrappers"] = norm_wrappers
    params["env"]["vect_wrappers"] = vect_wrappers


    menv = MakeEnvironment(session=None, mode=None, seed=1, **params["env"])
    params["env"]["config"] = menv.get_config()

    # Some parameters
    # params["env"]["gamma"] = 1-1/params["env"]["config"]["max_steps"] # 0.98



    #####################################
    # Runner: [episode < cycle < epoch] #
    #####################################
    params["runner"] = {}
    params["runner"]["name"] = cpanel.get("runner_name", "digideep.pipeline.Runner")
    params["runner"]["max_time"] = cpanel.get("max_exec_time", None)
    params["runner"]["max_iter"] = cpanel.get("max_exec_iter", None)
    params["runner"]["n_cycles"] = cpanel["epoch_size"]    # Meaning that 100 cycles are 1 epoch.
    params["runner"]["n_epochs"] = cpanel["number_epochs"] # Testing and savings are done after each epoch.
    params["runner"]["randargs"] = {'seed':cpanel["seed"], 'cuda_deterministic':cpanel["cuda_deterministic"]}
    params["runner"]["test_act"] = cpanel["test_activate"] # Test Activate
    params["runner"]["test_int"] = cpanel["test_interval"] # Test Interval
    params["runner"]["save_int"] = cpanel["save_interval"] # Save Interval

    # We "save" after each epoch is done.
    # We "test" after each epoch is done.


    
    params["agents"] = {}
    ##############################################
    ### Agent (#1) ###
    ##################
    params["agents"]["agent"] = {}
    params["agents"]["agent"]["name"] = "agent"
    params["agents"]["agent"]["type"] = cpanel["agent_type"]
    params["agents"]["agent"]["observation_path"] = cpanel["observation_key"]
    params["agents"]["agent"]["methodargs"] = {}
    params["agents"]["agent"]["methodargs"]["n_update"] = cpanel["n_update"]  # Number of times to perform PPO update. Alternative name: PPO_EPOCH
    params["agents"]["agent"]["methodargs"]["gamma"] = cpanel["gamma"]  # Discount factor Gamma
    
    # params["agents"]["agent"]["methodargs"]["clamp_return"] = 1/(1-float(cpanel["gamma"]))
    # print("Clip Return =", params["agents"]["agent"]["methodargs"]["clamp_return"])

    params["agents"]["agent"]["methodargs"]["mean_lambda"] = cpanel["mean_lambda"]
    params["agents"]["agent"]["methodargs"]["std_lambda"] = cpanel["std_lambda"]
    params["agents"]["agent"]["methodargs"]["z_lambda"] = cpanel["z_lambda"]

    ################
    params["agents"]["agent"]["sampler_list"] = ["digideep.agent.ddpg.sampler.sampler_re"]
    params["agents"]["agent"]["sampler_args"] = {"agent_name":params["agents"]["agent"]["name"],
                                                 "batch_size":cpanel["batch_size"],
                                                 "observation_path":params["agents"]["agent"]["observation_path"]
                                                }

    # # It deletes the last element from the chunk
    # params["agents"]["agent"]["sampler"]["truncate_datalists"] = {"n":1} # MUST be 1 to truncate last item: (T+1 --> T)

    #############
    ### Model ###
    #############
    agent_name = params["agents"]["agent"]["name"]
    observation_path = params["agents"]["agent"]["observation_path"]
    params["agents"]["agent"]["policyname"] = "digideep.agent.sac.Policy"
    params["agents"]["agent"]["policyargs"] = {"obs_space": params["env"]["config"]["observation_space"][observation_path],
                                               "act_space": params["env"]["config"]["action_space"][agent_name],
                                               "hidden_size": 256,
                                               "value_args": {"init_w":0.003},
                                               "softq_args": {"init_w":0.003},
                                               "actor_args": {"init_w":0.003, "log_std_min":-20, "log_std_max":2},
                                               "average_args": {"mode":"soft", "polyak_factor":cpanel["polyak_factor"]},
                                            #    # {"mode":"hard", "interval":10000}
                                               }
    
    # lim = params["env"]["config"]["action_space"][agent_name]["lim"][1][0]
    # # params["agents"]["agent"]["noisename"] = "digideep.agent.noises.EGreedyNoise"
    # # params["agents"]["agent"]["noiseargs"] = {"std":cpanel["noise_std"], "e":0.3, "lim": lim}
    
    # params["agents"]["agent"]["noisename"] = "digideep.agent.noises.OrnsteinUhlenbeckNoise"
    # params["agents"]["agent"]["noiseargs"] = {"mu":0, "theta":0.15, "sigma":cpanel["noise_std"], "lim":lim}
    # # params["agents"]["agent"]["noiseargs"] = {"mu":0, "theta":0.15, "sigma":1}

    params["agents"]["agent"]["optimname_value"] = "torch.optim.Adam"
    params["agents"]["agent"]["optimargs_value"] = {"lr":cpanel["lr_value"]}   # , "eps":cpanel["eps"]

    params["agents"]["agent"]["optimname_softq"] = "torch.optim.Adam"
    params["agents"]["agent"]["optimargs_softq"] = {"lr":cpanel["lr_softq"]}   # , "eps":cpanel["eps"]

    params["agents"]["agent"]["optimname_actor"] = "torch.optim.Adam"
    params["agents"]["agent"]["optimargs_actor"] = {"lr":cpanel["lr_actor"]}   # , "eps":cpanel["eps"]

    # # RMSprop optimizer alpha
    # # params["agents"]["agent"]["optimargs"] = {"lr":1e-2, "alpha":0.99, "eps":1e-5, "weight_decay":0, "momentum":0, "centered":False}
    ##############################################


    ##############################################
    ### Memory ###
    ##############
    params["memory"] = {}
    params["memory"]["train"] = {}
    params["memory"]["train"]["type"] = "digideep.memory.ringbuffer.Memory"
    params["memory"]["train"]["args"] = {"name":"train",
                                         "keep_old_checkpoints":cpanel.get("keep_old_checkpoints", False),
                                         "chunk_sample_len":cpanel["n_steps"],
                                         "buffer_chunk_len":cpanel["memory_size_in_chunks"],
                                         "overrun":1}
    ##############################################

    
    
    ##############################################
    ### Explorer ###
    ################
    params["explorer"] = {}

    params["explorer"]["train"] = {}
    params["explorer"]["train"]["mode"] = "train"
    params["explorer"]["train"]["env"] = params["env"]
    params["explorer"]["train"]["do_reset"] = False
    params["explorer"]["train"]["final_action"] = False
    params["explorer"]["train"]["warm_start"] = cpanel["warm_start"] # In less than "warm_start" steps the agent will take random actions. 
    params["explorer"]["train"]["num_workers"] = cpanel["num_workers"]
    params["explorer"]["train"]["deterministic"] = False # MUST: Takes random actions
    params["explorer"]["train"]["n_steps"] = cpanel["n_steps"] # Number of steps to take a step in the environment
    params["explorer"]["train"]["n_episodes"] = None # Do not limit # of episodes
    params["explorer"]["train"]["win_size"] = 20 # Number of episodes to episode reward for report
    params["explorer"]["train"]["render"] = False
    params["explorer"]["train"]["render_delay"] = 0
    params["explorer"]["train"]["seed"] = cpanel["seed"] + 90
    params["explorer"]["train"]["extra_env_kwargs"] = {}

    params["explorer"]["test"] = {}
    params["explorer"]["test"]["mode"] = "test"
    params["explorer"]["test"]["env"] = params["env"]
    params["explorer"]["test"]["do_reset"] = True
    params["explorer"]["test"]["final_action"] = False
    params["explorer"]["test"]["warm_start"] = 0
    params["explorer"]["test"]["num_workers"] = cpanel["num_workers"] # We can use the same amount of workers for testing!
    params["explorer"]["test"]["deterministic"] = True   # MUST: Takes the best action
    params["explorer"]["test"]["n_steps"] = None # Do not limit # of steps
    params["explorer"]["test"]["n_episodes"] = cpanel["test_win_size"]
    params["explorer"]["test"]["win_size"] = cpanel["test_win_size"] # Extra episodes won't be counted
    params["explorer"]["test"]["render"] = False
    params["explorer"]["test"]["render_delay"] = 0
    params["explorer"]["test"]["seed"] = cpanel["seed"] + 100 # We want to make the seed of test environments different from training.
    params["explorer"]["test"]["extra_env_kwargs"] = {}

    params["explorer"]["eval"] = {}
    params["explorer"]["eval"]["mode"] = "eval"
    params["explorer"]["eval"]["env"] = params["env"]
    params["explorer"]["eval"]["do_reset"] = False
    params["explorer"]["eval"]["final_action"] = False
    params["explorer"]["eval"]["warm_start"] = 0
    params["explorer"]["eval"]["num_workers"] = 1
    params["explorer"]["eval"]["deterministic"] = True   # MUST: Takes the best action
    params["explorer"]["eval"]["n_steps"] = None # Do not limit # of steps
    params["explorer"]["eval"]["n_episodes"] = 1
    params["explorer"]["eval"]["win_size"] = -1
    params["explorer"]["eval"]["render"] = True
    params["explorer"]["eval"]["render_delay"] = 0
    params["explorer"]["eval"]["seed"] = cpanel["seed"] + 101 # We want to make the seed of eval environment different from test/train.
    params["explorer"]["eval"]["extra_env_kwargs"] = {}
    ##############################################

    return params
Example #4
0
def gen_params(cpanel):
    params = {}
    # Environment
    params["env"] = {}
    params["env"]["name"] = cpanel["model_name"]

    # Other possible modules: roboschool | pybullet_envs
    params["env"]["from_module"] = cpanel.get("from_module", '')
    params["env"]["from_params"] = cpanel.get("from_params", False)

    ##############################################
    ### Normal Wrappers ###
    #######################
    norm_wrappers = []

    # Converting observation to 1 level
    norm_wrappers.append(
        dict(name="digideep.environment.wrappers.normal.WrapperLevelDictObs",
             args={
                 "path": cpanel["observation_key"],
             },
             enabled=False))
    # Normalizing actions (to be in [-1, 1])
    norm_wrappers.append(
        dict(
            name=
            "digideep.environment.wrappers.normalizers.WrapperNormalizeActDict",
            args={"paths": ["agent"]},
            enabled=False))
    ##############################################
    ### Vector Wrappers ###
    #######################
    vect_wrappers = []

    # Normalizing observations
    vect_wrappers.append(
        dict(name=
             "digideep.environment.wrappers.normalizers.VecNormalizeObsDict",
             args={
                 "paths": [cpanel["observation_key"]],
                 "clip": 10,
                 "epsilon": 1e-8
             },
             enabled=True))
    # Normalizing rewards
    vect_wrappers.append(
        dict(name="digideep.environment.wrappers.normalizers.VecNormalizeRew",
             args={
                 "clip": 10,
                 "gamma": cpanel["gamma"],
                 "epsilon": 1e-8
             },
             enabled=True))
    ##############################################
    params["env"]["main_wrappers"] = {
        "Monitor": {
            "allow_early_resets":
            True,  # We need it to allow early resets in the test environment.
            "reset_keywords": (),
            "info_keywords": ()
        },
        "WrapperDummyMultiAgent": {
            "agent_name": "agent"
        },
        "WrapperDummyDictObs": {
            "observation_key": "agent"
        }
    }
    params["env"]["norm_wrappers"] = norm_wrappers
    params["env"]["vect_wrappers"] = vect_wrappers

    menv = MakeEnvironment(session=None, mode=None, seed=1, **params["env"])
    params["env"]["config"] = menv.get_config()

    # Some parameters
    # params["env"]["gamma"] = 1-1/params["env"]["config"]["max_steps"] # 0.98

    #####################################
    # Runner: [episode < cycle < epoch] #
    #####################################
    params["runner"] = {}
    params["runner"]["name"] = cpanel.get("runner_name",
                                          "digideep.pipeline.Runner")
    params["runner"]["max_time"] = cpanel.get("max_exec_time", None)
    params["runner"]["max_iter"] = cpanel.get("max_exec_iter", None)
    params["runner"]["n_cycles"] = cpanel[
        "epoch_size"]  # Meaning that 100 cycles are 1 epoch.
    params["runner"]["n_epochs"] = cpanel[
        "number_epochs"]  # Testing and savings are done after each epoch.
    params["runner"]["randargs"] = {
        'seed': cpanel["seed"],
        'cuda_deterministic': cpanel["cuda_deterministic"]
    }
    params["runner"]["test_act"] = cpanel["test_activate"]  # Test Activate
    params["runner"]["test_int"] = cpanel["test_interval"]  # Test Interval
    params["runner"]["save_int"] = cpanel["save_interval"]  # Save Interval

    # We "save" after each epoch is done.
    # We "test" after each epoch is done.

    params["agents"] = {}
    ##############################################
    ### Agent (#1) ###
    ##################
    params["agents"]["agent"] = {}
    params["agents"]["agent"]["name"] = "agent"
    params["agents"]["agent"]["type"] = cpanel["agent_type"]
    params["agents"]["agent"]["observation_path"] = cpanel["observation_key"]
    params["agents"]["agent"]["methodargs"] = {}
    params["agents"]["agent"]["methodargs"]["n_steps"] = cpanel[
        "n_steps"]  # Same as "num_steps" / T
    params["agents"]["agent"]["methodargs"]["n_update"] = cpanel[
        "n_update"]  # Number of times to perform PPO update. Alternative name: PPO_EPOCH
    params["agents"]["agent"]["methodargs"]["clip_param"] = cpanel[
        "clip_param"]  # PPO clip parameter
    params["agents"]["agent"]["methodargs"]["value_loss_coef"] = cpanel[
        "value_loss_coef"]  # Value loss coefficient
    params["agents"]["agent"]["methodargs"]["entropy_coef"] = cpanel[
        "entropy_coef"]  # Entropy term coefficient
    params["agents"]["agent"]["methodargs"]["max_grad_norm"] = cpanel[
        "max_grad_norm"]  # Max norm of gradients
    params["agents"]["agent"]["methodargs"]["use_clipped_value_loss"] = cpanel[
        "use_clipped_value_loss"]

    params["agents"]["agent"]["sampler"] = {}
    params["agents"]["agent"]["sampler"]["agent_name"] = params["agents"][
        "agent"]["name"]
    params["agents"]["agent"]["sampler"]["num_mini_batches"] = cpanel[
        "num_mini_batches"]
    params["agents"]["agent"]["sampler"]["compute_advantages"] = {
        "gamma": cpanel["gamma"],  # Discount factor for rewards
        "tau": cpanel["tau"],  # GAE parameter
        "use_gae": cpanel["use_gae"]
    }
    # It deletes the last element from the chunk
    params["agents"]["agent"]["sampler"]["truncate_datalists"] = {
        "n": 1
    }  # MUST be 1 to truncate last item: (T+1 --> T)
    params["agents"]["agent"]["sampler"]["observation_path"] = params[
        "agents"]["agent"]["observation_path"]

    #############
    ### Model ###
    #############
    agent_name = params["agents"]["agent"]["name"]
    observation_path = params["agents"]["agent"]["observation_path"]
    params["agents"]["agent"]["policyname"] = "digideep.agent.ppo.Policy"
    params["agents"]["agent"]["policyargs"] = {
        "obs_space":
        params["env"]["config"]["observation_space"][observation_path],
        "act_space": params["env"]["config"]["action_space"][agent_name],
        "modelname": "digideep.model.models.MLPModel",
        "modelargs": {
            "recurrent": cpanel["recurrent"],
            "output_size": cpanel["actor_feature_size"]
        }
    }
    params["agents"]["agent"]["optimname"] = "torch.optim.Adam"
    params["agents"]["agent"]["optimargs"] = {
        "lr": cpanel["lr"],
        "eps": cpanel["eps"]
    }

    # RMSprop optimizer apha
    # params["agents"]["agent"]["optimargs"] = {"lr":1e-2, "alpha":0.99, "eps":1e-5, "weight_decay":0, "momentum":0, "centered":False}
    ##############################################

    ##############################################
    ### Memory ###
    ##############
    params["memory"] = {}

    params["memory"]["train"] = {}
    params["memory"]["train"]["type"] = "digideep.memory.rollbuffer.Memory"
    params["memory"]["train"]["args"] = {
        "name": "train",
        "chunk_sample_len": cpanel["n_steps"],
        "buffer_chunk_len": cpanel["memory_size_in_chunks"],
        "overrun": 1
    }
    ##############################################

    ##############################################
    ### Explorer ###
    ################
    params["explorer"] = {}

    params["explorer"]["train"] = {}
    params["explorer"]["train"]["mode"] = "train"
    params["explorer"]["train"]["env"] = params["env"]
    params["explorer"]["train"]["do_reset"] = False
    params["explorer"]["train"]["final_action"] = True
    params["explorer"]["train"]["warm_start"] = cpanel[
        "warm_start"]  # In less than "warm_start" steps the agent will take random actions.
    params["explorer"]["train"]["num_workers"] = cpanel["num_workers"]
    params["explorer"]["train"][
        "deterministic"] = False  # MUST: Takes random actions
    params["explorer"]["train"]["n_steps"] = cpanel[
        "n_steps"]  # Number of steps to take a step in the environment
    params["explorer"]["train"][
        "n_episodes"] = None  # Do not limit # of episodes
    params["explorer"]["train"][
        "win_size"] = 10  # Number of episodes to episode reward for report
    params["explorer"]["train"]["render"] = False
    params["explorer"]["train"]["render_delay"] = 0
    params["explorer"]["train"]["seed"] = cpanel["seed"]  # + 3500
    params["explorer"]["train"]["extra_env_kwargs"] = {}

    params["explorer"]["test"] = {}
    params["explorer"]["test"]["mode"] = "test"
    params["explorer"]["test"]["env"] = params["env"]
    params["explorer"]["test"]["do_reset"] = False
    params["explorer"]["test"]["final_action"] = False
    params["explorer"]["test"]["warm_start"] = 0
    params["explorer"]["test"]["num_workers"] = cpanel[
        "num_workers"]  # We can use the same amount of workers for testing!
    params["explorer"]["test"][
        "deterministic"] = True  # MUST: Takes the best action
    params["explorer"]["test"]["n_steps"] = None  # Do not limit # of steps
    params["explorer"]["test"]["n_episodes"] = cpanel["test_win_size"]
    params["explorer"]["test"]["win_size"] = cpanel[
        "test_win_size"]  # Extra episodes won't be counted
    params["explorer"]["test"]["render"] = False
    params["explorer"]["test"]["render_delay"] = 0
    params["explorer"]["test"]["seed"] = cpanel[
        "seed"] + 100  # We want to make the seed of test environments different from training.
    params["explorer"]["test"]["extra_env_kwargs"] = {}

    params["explorer"]["eval"] = {}
    params["explorer"]["eval"]["mode"] = "eval"
    params["explorer"]["eval"]["env"] = params["env"]
    params["explorer"]["eval"]["do_reset"] = False
    params["explorer"]["eval"]["final_action"] = False
    params["explorer"]["eval"]["warm_start"] = 0
    params["explorer"]["eval"]["num_workers"] = 1
    params["explorer"]["eval"][
        "deterministic"] = True  # MUST: Takes the best action
    params["explorer"]["eval"]["n_steps"] = None  # Do not limit # of steps
    params["explorer"]["eval"]["n_episodes"] = 1
    params["explorer"]["eval"]["win_size"] = -1
    params["explorer"]["eval"]["render"] = True
    params["explorer"]["eval"]["render_delay"] = 0
    params["explorer"]["eval"]["seed"] = cpanel[
        "seed"] + 101  # We want to make the seed of eval environment different from test/train.
    params["explorer"]["eval"]["extra_env_kwargs"] = {}
    ##############################################

    return params
Example #5
0
def gen_params(cpanel):
    params = {}
    # Environment
    params["env"] = {}
    params["env"]["name"]   = cpanel["model_name"]
    
    params["env"]["from_module"] = cpanel.get("from_module", '')
    params["env"]["from_params"] = cpanel.get("from_params", False)

    if params["env"]["from_params"]:
        # For having environment from parameters
        from digideep.environment.dmc2gym.registration import EnvCreator
        from dextron.zoo.hand_env.hand import grasp

        task_kwargs = {"generator":{"time_scale_offset":cpanel["time_scale_offset"],
                                    "time_scale_factor":cpanel["time_scale_factor"],
                                    "time_noise_factor":cpanel["time_noise_factor"]},
                       "random":None,
                       "pub_cameras":cpanel["pub_cameras"]} # "teaching_rate":cpanel["teaching_rate"]
        
        # visualize_reward=True
        environment_kwargs = {"time_limit":cpanel["time_limit"], "control_timestep":0.02}
        params["env"]["register_args"] = {"id":cpanel["model_name"],
                                          "entry_point":"digideep.environment.dmc2gym.wrapper:DmControlWrapper",
                                          "kwargs":{'dmcenv_creator':EnvCreator(grasp,
                                                                                task_kwargs=task_kwargs,
                                                                                environment_kwargs=environment_kwargs,
                                                                                visualize_reward=False),
                                                    'flat_observation':False,
                                                    'observation_key':"agent"}
                                         }

    ##############################################
    ### Normal Wrappers ###
    #######################
    norm_wrappers = []

    # Converting observation to 1 level
    norm_wrappers.append(dict(name="digideep.environment.wrappers.normal.WrapperLevelDictObs",
                              args={"path":cpanel["observation_key"],
                              },
                              enabled=True))
    # norm_wrappers.append(dict(name="digideep.environment.wrappers.normal.WrapperTransposeImage",
    #                           args={"path":"/camera"
    #                           },
    #                           enabled=True))
    # Normalizing actions (to be in [-1, 1])
    norm_wrappers.append(dict(name="digideep.environment.wrappers.normalizers.WrapperNormalizeActDict",
                              args={"paths":["agent"]},
                              enabled=False))

    ##############################################
    ### Vector Wrappers ###
    #######################
    vect_wrappers = []

    if cpanel["pub_cameras"]:
        vect_wrappers.append(dict(name="digideep.environment.wrappers.vector.VecFrameStackAxis",
                                args={"path":"/camera",
                                        "nstack":4, # By DQN Nature paper, it is called: phi length
                                        "axis":0},  # Axis=0 is required when ImageTransposeWrapper is called on the Atari games.
                                enabled=True))
    # Normalizing observations
    vect_wrappers.append(dict(name="digideep.environment.wrappers.normalizers.VecNormalizeObsDict",
                              args={"paths":[cpanel["observation_key"]],
                                    "clip":5, # 10
                                    "epsilon":1e-8
                              },
                              enabled=True))
    # Normalizing rewards
    vect_wrappers.append(dict(name="digideep.environment.wrappers.normalizers.VecNormalizeRew",
                              args={"clip":5, # 10
                                    "gamma":cpanel["gamma"],
                                    "epsilon":1e-8
                              },
                              enabled=True)) # Not a good idea to normalize sparse rewards.
    ##############################################
    params["env"]["main_wrappers"] = {"Monitor":{"allow_early_resets":True, # We need it to allow early resets in the test environment.
                                                 "reset_keywords":(),
                                                 "info_keywords":()},
                                      "WrapperDummyMultiAgent":{"agent_name":"agent"},
                                      "WrapperDummyDictObs":{"observation_key":"agent"}
                                     }
    params["env"]["norm_wrappers"] = norm_wrappers
    params["env"]["vect_wrappers"] = vect_wrappers


    menv = MakeEnvironment(session=None, mode=None, seed=1, **params["env"])
    params["env"]["config"] = menv.get_config()

    # Some parameters
    # params["env"]["gamma"] = 1-1/params["env"]["config"]["max_steps"] # 0.98



    #####################################
    # Runner: [episode < cycle < epoch] #
    #####################################
    params["runner"] = {}
    params["runner"]["name"] = cpanel.get("runner_name", "digideep.pipeline.Runner")
    params["runner"]["n_cycles"] = cpanel["epoch_size"]    # Meaning that 100 cycles are 1 epoch.
    params["runner"]["n_epochs"] = cpanel["number_epochs"] # Testing and savings are done after each epoch.
    params["runner"]["randargs"] = {'seed':cpanel["seed"], 'cuda_deterministic':cpanel["cuda_deterministic"]}
    params["runner"]["test_act"] = cpanel["test_activate"] # Test Activate
    params["runner"]["test_int"] = cpanel["test_interval"] # Test Interval
    params["runner"]["save_int"] = cpanel["save_interval"] # Save Interval

    # We "save" after each epoch is done.
    # We "test" after each epoch is done.


    
    params["agents"] = {}
    ##############################################
    ### Agent (#1) ### Soft Actor-Critic
    ##################
    params["agents"]["agent"] = {}
    params["agents"]["agent"]["name"] = "agent"
    params["agents"]["agent"]["type"] = "dextron.agent.sac.Agent" # "digideep.agent.sac.Agent"
    params["agents"]["agent"]["observation_path"] = "/camera"     # cpanel["observation_key"]
    params["agents"]["agent"]["methodargs"] = {}
    params["agents"]["agent"]["methodargs"]["n_update"] = cpanel["n_update"]  # Number of times to perform PPO update. Alternative name: PPO_EPOCH
    params["agents"]["agent"]["methodargs"]["gamma"] = cpanel["gamma"]  # Discount factor Gamma
    
    # params["agents"]["agent"]["methodargs"]["clamp_return"] = 1/(1-float(cpanel["gamma"]))
    # print("Clip Return =", params["agents"]["agent"]["methodargs"]["clamp_return"])

    params["agents"]["agent"]["methodargs"]["mean_lambda"] = cpanel["mean_lambda"]
    params["agents"]["agent"]["methodargs"]["std_lambda"] = cpanel["std_lambda"]
    params["agents"]["agent"]["methodargs"]["z_lambda"] = cpanel["z_lambda"]

    ################
    demo_batch_size = int(cpanel["demo_use_ratio"] * cpanel["batch_size"])
    train_batch_size  = cpanel["batch_size"] - demo_batch_size
    
    params["agents"]["agent"]["sampler_list"] = ["dextron.agent.sac.multi_sampler.multi_memory_sample"]
    params["agents"]["agent"]["sampler_args"] = {"agent_name":params["agents"]["agent"]["name"],
                                                 "batch_size":cpanel["batch_size"],
                                                 "scheduler_start":cpanel["scheduler_start"],
                                                 "scheduler_steps":cpanel["scheduler_steps"],
                                                 "scheduler_decay":cpanel["scheduler_decay"],
                                                 "batch_size_dict":{"train":train_batch_size, "demo":demo_batch_size},
                                                 "observation_path":params["agents"]["agent"]["observation_path"]
                                                }

#     replay_batch_size = int(cpanel["replay_use_ratio"] * cpanel["batch_size"])
#     train_batch_size  = cpanel["batch_size"] - replay_batch_size
#
#     params["agents"]["agent"]["sampler_list"] = ["dextron.agent.sac.multi_sampler.multi_memory_sample"]
#     params["agents"]["agent"]["sampler_args"] = {"agent_name": params["agents"]["agent"]["name"],
#                                                  "batch_size_dict": {"train":train_batch_size, "replay":replay_batch_size},
#                                                  "observation_path": params["agents"]["agent"]["observation_path"]
#                                                 }

    # # It deletes the last element from the chunk
    # params["agents"]["agent"]["sampler"]["truncate_datalists"] = {"n":1} # MUST be 1 to truncate last item: (T+1 --> T)

    #############
    ### Model ###
    #############
    agent_name = params["agents"]["agent"]["name"]
    observation_path = params["agents"]["agent"]["observation_path"]
    # params["agents"]["agent"]["policyname"] = "digideep.agent.sac.Policy"
    params["agents"]["agent"]["policyargs"] = {"obs_space": params["env"]["config"]["observation_space"][observation_path],
                                               "act_space": params["env"]["config"]["action_space"][agent_name],
                                               "image_repr_size": 64,
                                               "hidden_size": 256,
                                               "value_args": {"init_w":0.003},
                                               "softq_args": {"init_w":0.003},
                                               "actor_args": {"init_w":0.003, "log_std_min":-20, "log_std_max":2},
                                               "average_args": {"mode":"soft", "polyak_factor":cpanel["polyak_factor"]},
                                                # # {"mode":"hard", "interval":10000}
                                               }
    
    # lim = params["env"]["config"]["action_space"][agent_name]["lim"][1][0]
    # # params["agents"]["agent"]["noisename"] = "digideep.agent.noises.EGreedyNoise"
    # # params["agents"]["agent"]["noiseargs"] = {"std":cpanel["noise_std"], "e":0.3, "lim": lim}
    
    # params["agents"]["agent"]["noisename"] = "digideep.agent.noises.OrnsteinUhlenbeckNoise"
    # params["agents"]["agent"]["noiseargs"] = {"mu":0, "theta":0.15, "sigma":cpanel["noise_std"], "lim":lim}
    # # params["agents"]["agent"]["noiseargs"] = {"mu":0, "theta":0.15, "sigma":1}

    params["agents"]["agent"]["optimname_value"] = "torch.optim.Adam"
    params["agents"]["agent"]["optimargs_value"] = {"lr":cpanel["lr_value"]}   # , "eps":cpanel["eps"]

    params["agents"]["agent"]["optimname_softq"] = "torch.optim.Adam"
    params["agents"]["agent"]["optimargs_softq"] = {"lr":cpanel["lr_softq"]}   # , "eps":cpanel["eps"]

    params["agents"]["agent"]["optimname_actor"] = "torch.optim.Adam"
    params["agents"]["agent"]["optimargs_actor"] = {"lr":cpanel["lr_actor"]}   # , "eps":cpanel["eps"]


    # # RMSprop optimizer alpha
    # # params["agents"]["agent"]["optimargs"] = {"lr":1e-2, "alpha":0.99, "eps":1e-5, "weight_decay":0, "momentum":0, "centered":False}
    ##############################################

    
    
    ##############################################
    ### Agent (#2) ### Demonstrator
    ##################
    params["agents"]["demonstrator"] = {}
    params["agents"]["demonstrator"]["name"] = "demonstrator"
    params["agents"]["demonstrator"]["type"] = "dextron.agent.demonstrator.NaiveController"
    params["agents"]["demonstrator"]["methodargs"] = {}
    agent_name = params["agents"]["demonstrator"]["name"]
    params["agents"]["demonstrator"]["methodargs"]["act_space"] = params["env"]["config"]["action_space"][agent_name]
    ##############################################



    ##############################################
    ### Memory ###
    ##############
    params["memory"] = {}

    # TODO: The memory size in chunks should be proportionately distributed. We think that "demo" should have a
    #       smaller memory size.

    # "digideep.memory.generic.Memory" | "digideep.memory.ringbuffer.Memory"
    # chunk_sample_len: Number of samples in a chunk
    # buffer_chunk_len: Number of chunks in the buffer

    params["memory"]["train"] = {}
    params["memory"]["train"]["type"] = "digideep.memory.ringbuffer.Memory"
    params["memory"]["train"]["args"] = {"chunk_sample_len":cpanel["n_steps"], "buffer_chunk_len":cpanel["memory_size_in_chunks"], "overrun":1}
    
    params["memory"]["demo"] = {}
    params["memory"]["demo"]["type"] = "digideep.memory.ringbuffer.Memory"
    params["memory"]["demo"]["args"] = {"chunk_sample_len":cpanel["n_steps"], "buffer_chunk_len":cpanel["demo_memory_size_in_chunks"], "overrun":1}

    # params["memory"]["replay"] = {}
    # params["memory"]["replay"]["type"] = "digideep.memory.ringbuffer.Memory"
    # params["memory"]["replay"]["args"] = {"chunk_sample_len":cpanel["replay_nsteps"], "buffer_chunk_len":cpanel["memory_size_in_chunks"]}
    ##############################################

    
    
    ##############################################
    ### Explorer ###
    ################
    params["explorer"] = {}

    params["explorer"]["train"] = {}
    params["explorer"]["train"]["mode"] = "train"
    params["explorer"]["train"]["env"] = params["env"]
    params["explorer"]["train"]["do_reset"] = False
    params["explorer"]["train"]["final_action"] = False
    params["explorer"]["train"]["warm_start"] = cpanel["warm_start"] # In less than "warm_start" steps the agent will take random actions. 
    params["explorer"]["train"]["num_workers"] = cpanel["num_workers"]
    params["explorer"]["train"]["deterministic"] = False # MUST: Takes random actions
    params["explorer"]["train"]["n_steps"] = cpanel["n_steps"] # Number of steps to take a step in the environment
    params["explorer"]["train"]["n_episodes"] = None # Do not limit # of episodes
    params["explorer"]["train"]["win_size"] = 20 # Number of episodes to episode reward for report
    params["explorer"]["train"]["render"] = False
    params["explorer"]["train"]["render_delay"] = 0
    params["explorer"]["train"]["seed"] = cpanel["seed"] + 90
    params["explorer"]["train"]["extra_env_kwargs"] = {"mode":params["explorer"]["train"]["mode"], "allow_demos":False}

    params["explorer"]["test"] = {}
    params["explorer"]["test"]["mode"] = "test"
    params["explorer"]["test"]["env"] = params["env"]
    params["explorer"]["test"]["do_reset"] = True
    params["explorer"]["test"]["final_action"] = False
    params["explorer"]["test"]["warm_start"] = 0
    params["explorer"]["test"]["num_workers"] = cpanel["num_workers"] # We can use the same amount of workers for testing!
    params["explorer"]["test"]["deterministic"] = True   # MUST: Takes the best action
    params["explorer"]["test"]["n_steps"] = None # Do not limit # of steps
    params["explorer"]["test"]["n_episodes"] = cpanel["test_win_size"]
    params["explorer"]["test"]["win_size"] = cpanel["test_win_size"] # Extra episodes won't be counted
    params["explorer"]["test"]["render"] = False
    params["explorer"]["test"]["render_delay"] = 0
    params["explorer"]["test"]["seed"] = cpanel["seed"] + 100 # We want to make the seed of test environments different from training.
    params["explorer"]["test"]["extra_env_kwargs"] = {"mode":params["explorer"]["test"]["mode"], "allow_demos":False}

    params["explorer"]["eval"] = {}
    params["explorer"]["eval"]["mode"] = "eval"
    params["explorer"]["eval"]["env"] = params["env"]
    params["explorer"]["eval"]["do_reset"] = False
    params["explorer"]["eval"]["final_action"] = False
    params["explorer"]["eval"]["warm_start"] = 0
    params["explorer"]["eval"]["num_workers"] = 1
    params["explorer"]["eval"]["deterministic"] = True   # MUST: Takes the best action
    params["explorer"]["eval"]["n_steps"] = None # Do not limit # of steps
    params["explorer"]["eval"]["n_episodes"] = 1
    params["explorer"]["eval"]["win_size"] = -1
    params["explorer"]["eval"]["render"] = True
    params["explorer"]["eval"]["render_delay"] = 0
    params["explorer"]["eval"]["seed"] = cpanel["seed"] + 101 # We want to make the seed of eval environment different from test/train.
    params["explorer"]["eval"]["extra_env_kwargs"] = {"mode":params["explorer"]["eval"]["mode"], "allow_demos":cpanel.get("allow_demos", False)}
    ##############################################

    params["explorer"]["demo"] = {}
    params["explorer"]["demo"]["mode"] = "demo"
    params["explorer"]["demo"]["env"] = params["env"]
    params["explorer"]["demo"]["do_reset"] = False
    params["explorer"]["demo"]["final_action"] = False
    params["explorer"]["demo"]["warm_start"] = 0
    params["explorer"]["demo"]["num_workers"] = cpanel["num_workers"]
    params["explorer"]["demo"]["deterministic"] = False # MUST: Takes random actions
    params["explorer"]["demo"]["n_steps"] = cpanel["n_steps"] # Number of steps to take a step in the environment
    params["explorer"]["demo"]["n_episodes"] = None
    params["explorer"]["demo"]["win_size"] = -1
    params["explorer"]["demo"]["render"] = False # True # False
    params["explorer"]["demo"]["render_delay"] = 0
    params["explorer"]["demo"]["seed"] = cpanel["seed"] + 50
    params["explorer"]["demo"]["extra_env_kwargs"] = {"mode":params["explorer"]["demo"]["mode"], "allow_demos":True}


#     params["explorer"]["replay"] = {}
#     params["explorer"]["replay"]["mode"] = "replay"
#     params["explorer"]["replay"]["env"] = params["env"]
#     params["explorer"]["replay"]["do_reset"] = False
#     params["explorer"]["replay"]["final_action"] = False
#     params["explorer"]["replay"]["warm_start"] = 0
#     params["explorer"]["replay"]["num_workers"] = cpanel["num_workers"]
#     params["explorer"]["replay"]["deterministic"] = False # MUST: Takes random actions
#     params["explorer"]["replay"]["n_steps"] = cpanel["replay_nsteps"] # Number of steps to take a step in the environment
#     params["explorer"]["replay"]["n_episodes"] = None
#     params["explorer"]["replay"]["win_size"] = 10
#     params["explorer"]["replay"]["render"] = False # False
#     params["explorer"]["replay"]["render_delay"] = 0
#     params["explorer"]["replay"]["seed"] = cpanel["seed"] + 50
#     params["explorer"]["replay"]["extra_env_kwargs"] = {"mode":params["explorer"]["replay"]["mode"], "allow_demos":False}

    return params