Ejemplo n.º 1
0
def run_parallel_episodes(arg):
        
        total_steps = 0
        env_c = gym.Init(arg[4], False)
        policy = Policy(arg[0], arg[1], arg[2], arg[4], True)
        scaler = Scaler(arg[0], arg[4])
        scaler.resume()
        observes, actions, rewards, unscaled_obs = run_episode(env_c, policy, scaler, arg[3])
        total_steps += observes.shape[0]
        trajectory = {'observes': observes,
                      'actions': actions,
                      'rewards': rewards,
                      'unscaled_obs': unscaled_obs}
        policy.close_sess()
        return trajectory
def init_gym(env_name, render):
    """
    Initialize gym environment, return dimension of observation
    and action spaces.

    Args:
        render: True to toggle on visualization

    Returns: 3-tuple
        environment (object)
        number of observation dimensions (int)
        number of action dimensions (int)
    """
    env = gym.Init(env_name, render)
    obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]

    return env, obs_dim, act_dim