def make(env_name, *make_args, **make_kwargs): if env_name == "CartPoleSwingUp": return CartPoleSwingUpEnv() elif env_name == "CartPoleSwingUpContinuous": return CartPoleSwingUpContinuousEnv() else: return gym_make(env_name, *make_args, **make_kwargs)
def test(path, chkpt, config): """ Retrieves a trained PPO agent from the indicated path. While the specified workload runs in the background, the specified GYM environment is created and tested with the trained agent, according to the indicated test configuration. If stated, test results and environment history log will be saved in the indicated path. Parameters ---------- path : str Path of folder where the trained agent checkpoints are stored. chkpt : int Number of the training checkpoint to be used. config : dict General configuration of the test environment. Returns ------- results : dict(int, int) Results of test, storing the frequency count of iterations. history : dict(str, str, dict) Status history of the environment during test, for each iteration and step. """ import ray from ray.tune.registry import register_env env = config['env'] work = config['work'] ## REGISTER ENVIRONMENT Env = load_env_class(gym_spec(env).entry_point) register_env(env, lambda config: Env(**config)) ## TRAINED AGENT RETRIEVAL ray.init(ignore_reinit_error=True) chkptpath = path + f"/checkpoint_{chkpt}/checkpoint-{chkpt}" agent = get_PPOagent(env, config['envconfig'], chkptpath) ## BACKGROUND WORKLOAD INITIALIZATION workers = start_work(work, config['workconfig']) ## TEST ENVIRONMENT WITH TRAINED AGENT testenv = gym_make(env, **config['envconfig']) history = test_env(testenv, agent, config['testconfig']) ## BACKGROUND WORKLOAD KILL end_work(workers) ## SAVE RESULTS AND STATUS HISTORY logpath = config['testconfig'].get('logpath', None) if logpath is not None: logpath += f'/checkpoint-{chkpt}' generate_log(history, logpath) save_config(config, logpath) return history
def make_environment(env_name): """Convert environment with wrappers""" env = gym_make(env_name) env = MaxAndSkipEnv(env) env = FireResetEnv(env) env = ProcessFrame84(env) env = ImageToPyTorch(env) env = BufferWrapper(env, 4) return ScaledFloatFrame(env)
def __init__(self, ctrl_other=None): ENV_NAME = 'Pendulum-v0' # Get the environment and extract the number of actions. self.env = gym_make(ENV_NAME) assert len(self.env.action_space.shape) == 1 self.other_controller = ctrl_other # Static Controller for now self.t = 0. self.dt = self.env.unwrapped.dt self.state = self.reset() self.action_space = self.env.action_space self.max_torque = self.env.unwrapped.max_torque high = np.array([np.pi, 1]) self.observation_space = Box(low=-high, high=high, dtype=np.float32)
def __init__(self, ctrl_other=None, max_torque=None, action_space_u1=None, action_space_u2=None, perfident = False): ENV_NAME = 'Pendulum-v0' # Get the environment and extract the number of actions. self.env = gym_make(ENV_NAME).env # last .env enables episodes longer than 200 steps assert len(self.env.action_space.shape) == 1 self.other_controller = ctrl_other # Can be overwritten by calling set_ctrl_other() afterwards self.t = 0. self.dt = self.env.unwrapped.dt self.state = self.reset() self.action_space = self.env.action_space self.action_space_u1 = deepcopy(self.action_space) if action_space_u1 is None else action_space_u1 # limits u1 self.action_space_u2 = deepcopy(self.action_space) if action_space_u2 is None else action_space_u2 # limits u2 self.max_torque = self.env.unwrapped.max_torque if max_torque is None else max_torque # limits the summed u high = np.array([np.pi, 1]) self.observation_space = Box(low=-high, high=high, dtype=np.float32) self.perfident = perfident # if the limit of u_other is incorporated
def make_with_gym(): env = gym_make("BreakoutNoFrameskip-v4") env.seed(0) env.action_space.np_random.seed(0) return env