Beispiel #1
0
def make_env(env_name, seed=-1, render_mode=False):
    if (env_name.startswith("CartPoleSwingUp")):
        if (env_name.startswith("CartPoleSwingUpHarder")):
            print("cartpole_swingup_harder_started")
            from cartpole_swingup_harder import CartPoleSwingUpHarderEnv
            env = CartPoleSwingUpHarderEnv()
        else:
            print("cartpole_swingup_started")
            from cartpole_swingup import CartPoleSwingUpEnv
            env = CartPoleSwingUpEnv()
    elif (env_name.startswith("DreamCartPoleSwingUp")):
        print("dream_cartpole_swingup_started")
        from dream import DreamCartPoleSwingUpEnv
        env = DreamCartPoleSwingUpEnv()
    else:
        assert False, "invalid environment name."
    if (seed >= 0):
        env.seed(seed)
    '''
  print("environment details")
  print("env.action_space", env.action_space)
  print("high, low", env.action_space.high, env.action_space.low)
  print("environment details")
  print("env.observation_space", env.observation_space)
  print("high, low", env.observation_space.high, env.observation_space.low)
  assert False
  '''
    return env
Beispiel #2
0
        return Model(5, 1, 16)

    def _evaluate_once(self, env, model):
        obs = env.reset()
        h = model.M.reset()
        rewards = 0
        done = False
        while not done:
            action = model(obs, h, module="C")
            obs, reward, done, _ = env.step(action)
            h = model(obs, action, module="M")
            rewards += reward
        return rewards


env = CartPoleSwingUpEnv()
es = OpenaiES(len(Model(5, 1, 16)), sigma=args.sigma, stepsize=args.stepsize)
global_best_fitness = -np.inf

with CartPoleSwingUpEvaluator(args.num_workers, args.models_per_worker,
                              args.precision) as evaluator:
    popsize = len(evaluator)
    for gen in range(args.num_gen):
        seeds, solutions = es.sample(popsize)
        fitness, success = evaluator.evaluate(seeds, solutions, args.num_evals)
        assert success, f"evaluation failed at generation {gen}"
        es.step(fitness)

        best_fitness = np.max(fitness)
        if best_fitness > global_best_fitness:
            print(
Beispiel #3
0
 def _build_env(self):
     return CartPoleSwingUpEnv()
Beispiel #4
0
def make_env(name):
    """Simple helper function to load CartPoleSwingUp or a gym environment."""
    if name == 'CartPoleSwingUp':
        return CartPoleSwingUpEnv()
    else:
        return gym.make(name)