def make_env(env_name, seed=-1, render_mode=False): if (env_name.startswith("CartPoleSwingUp")): if (env_name.startswith("CartPoleSwingUpHarder")): print("cartpole_swingup_harder_started") from cartpole_swingup_harder import CartPoleSwingUpHarderEnv env = CartPoleSwingUpHarderEnv() else: print("cartpole_swingup_started") from cartpole_swingup import CartPoleSwingUpEnv env = CartPoleSwingUpEnv() elif (env_name.startswith("DreamCartPoleSwingUp")): print("dream_cartpole_swingup_started") from dream import DreamCartPoleSwingUpEnv env = DreamCartPoleSwingUpEnv() else: assert False, "invalid environment name." if (seed >= 0): env.seed(seed) ''' print("environment details") print("env.action_space", env.action_space) print("high, low", env.action_space.high, env.action_space.low) print("environment details") print("env.observation_space", env.observation_space) print("high, low", env.observation_space.high, env.observation_space.low) assert False ''' return env
return Model(5, 1, 16) def _evaluate_once(self, env, model): obs = env.reset() h = model.M.reset() rewards = 0 done = False while not done: action = model(obs, h, module="C") obs, reward, done, _ = env.step(action) h = model(obs, action, module="M") rewards += reward return rewards env = CartPoleSwingUpEnv() es = OpenaiES(len(Model(5, 1, 16)), sigma=args.sigma, stepsize=args.stepsize) global_best_fitness = -np.inf with CartPoleSwingUpEvaluator(args.num_workers, args.models_per_worker, args.precision) as evaluator: popsize = len(evaluator) for gen in range(args.num_gen): seeds, solutions = es.sample(popsize) fitness, success = evaluator.evaluate(seeds, solutions, args.num_evals) assert success, f"evaluation failed at generation {gen}" es.step(fitness) best_fitness = np.max(fitness) if best_fitness > global_best_fitness: print(
def _build_env(self): return CartPoleSwingUpEnv()
def make_env(name): """Simple helper function to load CartPoleSwingUp or a gym environment.""" if name == 'CartPoleSwingUp': return CartPoleSwingUpEnv() else: return gym.make(name)