Esempio n. 1
0
def make(env_name, *make_args, **make_kwargs):
    if env_name == "CartPoleSwingUp":
        return CartPoleSwingUpEnv()
    elif env_name == "CartPoleSwingUpContinuous":
        return CartPoleSwingUpContinuousEnv()
    else:
        return gym_make(env_name, *make_args, **make_kwargs)
Esempio n. 2
0
def test(path, chkpt, config):
    """
        Retrieves a trained PPO agent from the indicated path. While
        the specified workload runs in the background, the specified 
        GYM environment is created and tested with the trained agent,
        according to the indicated test configuration. If stated, 
        test results and environment history log will be saved in the
        indicated path.

        Parameters
        ----------
        path : str
            Path of folder where the trained agent checkpoints are stored.
        chkpt : int
            Number of the training checkpoint to be used.
        config : dict
            General configuration of the test environment.

        Returns
        -------
        results : dict(int, int)
            Results of test, storing the frequency count of iterations.
        history : dict(str, str, dict)
            Status history of the environment during test, for each iteration 
            and step.
    """
    import ray
    from ray.tune.registry import register_env

    env = config['env']
    work = config['work']

    ## REGISTER ENVIRONMENT
    Env = load_env_class(gym_spec(env).entry_point)
    register_env(env, lambda config: Env(**config))

    ## TRAINED AGENT RETRIEVAL
    ray.init(ignore_reinit_error=True)

    chkptpath = path + f"/checkpoint_{chkpt}/checkpoint-{chkpt}"
    agent = get_PPOagent(env, config['envconfig'], chkptpath)

    ## BACKGROUND WORKLOAD INITIALIZATION
    workers = start_work(work, config['workconfig'])

    ## TEST ENVIRONMENT WITH TRAINED AGENT
    testenv = gym_make(env, **config['envconfig'])
    history = test_env(testenv, agent, config['testconfig'])

    ## BACKGROUND WORKLOAD KILL
    end_work(workers)

    ## SAVE RESULTS AND STATUS HISTORY
    logpath = config['testconfig'].get('logpath', None)
    if logpath is not None:
        logpath += f'/checkpoint-{chkpt}'
        generate_log(history, logpath)
        save_config(config, logpath)

    return history
def make_environment(env_name):
    """Convert environment with wrappers"""
    env = gym_make(env_name)
    env = MaxAndSkipEnv(env)
    env = FireResetEnv(env)
    env = ProcessFrame84(env)
    env = ImageToPyTorch(env)
    env = BufferWrapper(env, 4)
    return ScaledFloatFrame(env)
Esempio n. 4
0
    def __init__(self, ctrl_other=None):
        ENV_NAME = 'Pendulum-v0'

        # Get the environment and extract the number of actions.
        self.env = gym_make(ENV_NAME)
        assert len(self.env.action_space.shape) == 1
        self.other_controller = ctrl_other  # Static Controller for now
        self.t = 0.
        self.dt = self.env.unwrapped.dt
        self.state = self.reset()
        self.action_space = self.env.action_space
        self.max_torque = self.env.unwrapped.max_torque
        high = np.array([np.pi, 1])
        self.observation_space = Box(low=-high, high=high, dtype=np.float32)
Esempio n. 5
0
    def __init__(self, ctrl_other=None, max_torque=None, action_space_u1=None, action_space_u2=None, perfident = False):
        ENV_NAME = 'Pendulum-v0'

        # Get the environment and extract the number of actions.
        self.env = gym_make(ENV_NAME).env  # last .env enables episodes longer than 200 steps
        assert len(self.env.action_space.shape) == 1
        self.other_controller = ctrl_other  # Can be overwritten by calling set_ctrl_other() afterwards
        self.t = 0.
        self.dt = self.env.unwrapped.dt
        self.state = self.reset()
        self.action_space = self.env.action_space
        self.action_space_u1 = deepcopy(self.action_space) if action_space_u1 is None else action_space_u1  # limits u1
        self.action_space_u2 = deepcopy(self.action_space) if action_space_u2 is None else action_space_u2  # limits u2
        self.max_torque = self.env.unwrapped.max_torque if max_torque is None else max_torque  # limits the summed u
        high = np.array([np.pi, 1])
        self.observation_space = Box(low=-high, high=high, dtype=np.float32)
        self.perfident = perfident  # if the limit of u_other is incorporated
 def make_with_gym():
     env = gym_make("BreakoutNoFrameskip-v4")
     env.seed(0)
     env.action_space.np_random.seed(0)
     return env