def run_parallel_episodes(arg): total_steps = 0 env_c = gym.Init(arg[4], False) policy = Policy(arg[0], arg[1], arg[2], arg[4], True) scaler = Scaler(arg[0], arg[4]) scaler.resume() observes, actions, rewards, unscaled_obs = run_episode(env_c, policy, scaler, arg[3]) total_steps += observes.shape[0] trajectory = {'observes': observes, 'actions': actions, 'rewards': rewards, 'unscaled_obs': unscaled_obs} policy.close_sess() return trajectory
def init_gym(env_name, render): """ Initialize gym environment, return dimension of observation and action spaces. Args: render: True to toggle on visualization Returns: 3-tuple environment (object) number of observation dimensions (int) number of action dimensions (int) """ env = gym.Init(env_name, render) obs_dim = env.observation_space.shape[0] act_dim = env.action_space.shape[0] return env, obs_dim, act_dim