Ejemplo n.º 1
0
def main(env_name, num_episodes, render, VideoSave, gamma, lam, kl_targ, batch_size):
    killer = GracefulKiller()
    env, obs_dim, act_dim = init_gym(env_name, render)
    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    now = datetime.utcnow().strftime("%b-%d_%H-%M-%S")  # create unique directories
    logger = Logger(logname=env_name, now=now)
    #aigym_path = os.path.join('/tmp', env_name, now)
    #env = wrappers.Monitor(env, aigym_path, force=True) 
    scaler = Scaler(obs_dim, env_name)
    scaler.resume()
    val_func = NNValueFunction(obs_dim, env_name)
    policy = Policy(obs_dim, act_dim, kl_targ, env_name)
    episode = 0
    capture = False
    while episode < num_episodes:
        if VideoSave and not capture:
            env.ScreenCapture(5)
            capture = True
        trajectories = run_policy(env, policy, scaler, logger, episodes=batch_size)
        episode += len(trajectories)
        

        if killer.kill_now:
            if input('Terminate training (y/[n])? ') == 'y':
                break
            killer.kill_now = False
    logger.close()
    policy.close_sess()
    val_func.close_sess()
Ejemplo n.º 2
0
def main(env_name, num_episodes, render, VideoSave, gamma, lam, kl_targ,
         batch_size):
    killer = GracefulKiller()
    env, obs_dim, act_dim = init_gym(env_name, render)
    obs_dim += 1  # add 1 to obs dimension for time step feature (see run_episode())
    now = datetime.utcnow().strftime(
        "%b-%d_%H-%M-%S")  # create unique directories
    logger = Logger(logname=env_name, now=now)
    #aigym_path = os.path.join('/tmp', env_name, now)
    #env = wrappers.Monitor(env, aigym_path, force=True)
    scaler = Scaler(obs_dim, env_name)
    scaler.resume()
    val_func = NNValueFunction(obs_dim, env_name)
    policy = Policy(obs_dim, act_dim, kl_targ, env_name)
    episode = 0
    capture = False
    while episode < num_episodes:
        if VideoSave and not capture:
            env.ScreenCapture(5)
            capture = True
        trajectories = run_policy(env,
                                  policy,
                                  scaler,
                                  logger,
                                  episodes=batch_size)
        episode += len(trajectories)

        if killer.kill_now:
            if input('Terminate training (y/[n])? ') == 'y':
                break
            killer.kill_now = False
    logger.close()
    policy.close_sess()
    val_func.close_sess()
Ejemplo n.º 3
0
def run_parallel_episodes(arg):
        
        total_steps = 0
        env_c = gym.Init(arg[4], False)
        policy = Policy(arg[0], arg[1], arg[2], arg[4], True)
        scaler = Scaler(arg[0], arg[4])
        scaler.resume()
        observes, actions, rewards, unscaled_obs = run_episode(env_c, policy, scaler, arg[3])
        total_steps += observes.shape[0]
        trajectory = {'observes': observes,
                      'actions': actions,
                      'rewards': rewards,
                      'unscaled_obs': unscaled_obs}
        policy.close_sess()
        return trajectory