Ejemplo n.º 1
0
def make_vec_envs(env_name,
                  seed,
                  num_processes,
                  gamma,
                  log_dir,
                  add_timestep,
                  device,
                  allow_early_resets,
                  num_frame_stack=None,
                  args=None):

    if 'golmultienv' in env_name.lower():
        num_processes = 1  # smuggle in real num_proc in args so we can run them as one NN
    envs = [
        make_env(env_name,
                 seed,
                 i,
                 log_dir,
                 add_timestep,
                 allow_early_resets,
                 map_width=args.map_width,
                 render_gui=args.render,
                 print_map=args.print_map,
                 noreward=args.no_reward,
                 max_step=args.max_step,
                 args=args) for i in range(num_processes)
    ]

    if 'golmultienv' in env_name.lower():
        return envs[0]()

    if len(envs) > 1:
        print(envs)
        envs = SubprocVecEnv(envs)
    else:
        if sys.version[0] == '2':
            envs = DummyVecEnv('DummyVecEnv', (), {1: envs})
        else:
            envs = DummyVecEnv(envs)

    if len(envs.observation_space.shape) == 1:
        if gamma is None:
            envs = VecNormalize(envs, ret=False)
        else:
            envs = VecNormalize(envs, gamma=gamma)

    envs = VecPyTorch(envs, device)

    if num_frame_stack is not None:
        print('stacking {} frames'.format(num_frame_stack))
        envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
    elif len(envs.observation_space.shape) == 3:
        envs = VecPyTorchFrameStack(envs, 1, device)

    return envs
def view_policy_ddpg():
    env = DummyVecEnv([lambda: EnvHandler(make_env())])
    view(env,
         seed=None,
         total_timesteps=10000,
         reward_scale=1.0,
         render=True,
         render_eval=False,
         noise_type=None,
         normalize_returns=False,
         normalize_observations=False,
         critic_l2_reg=1e-2,
         actor_lr=1e-4,
         critic_lr=1e-3,
         popart=False,
         gamma=0.99,
         clip_norm=None,
         nb_train_steps=50,
         nb_eval_steps=100,
         nb_save_epochs=None,
         batch_size=64,
         tau=0.01,
         action_range=(-250.0, 250.0),
         observation_range=(-5.0, 5.0),
         eval_env=None,
         load_path="./checkpoints/00007",
         save_dir=None,
         param_noise_adaption_interval=50)
def view_policy_ppo():
    env = DummyVecEnv([lambda: EnvHandler(make_env())])
    view(env=env,
         episodes=100,
         total_timesteps=1000000,
         nsteps=200,
         nminibatches=1,
         cliprange=0.2,
         ent_coef=0.0,
         lam=0.95,
         gamma=0.99,
         noptepochs=4,
         save_interval=100,
         save_dir=".",
         load_path="./checkpoints/00500",
         normalize_observations=False,
         normalize_returns=False)
def make_vec_envs(env_name, seed, num_processes, gamma, log_dir, add_timestep,
                  device, allow_early_resets):
    envs = [
        make_env(env_name, seed, i, log_dir, add_timestep, allow_early_resets)
        for i in range(num_processes)
    ]

    if len(envs) > 1:
        envs = SubprocVecEnv(envs)
    else:
        envs = DummyVecEnv(envs)

    envs = VecPyTorch(envs, device)
    '''
    if len(envs.observation_space.shape) == 3:
        print('Creating frame stacking wrapper')
        envs = VecPyTorchFrameStack(envs, 4, device)
        #print(envs.observation_space)    '''

    return envs
Ejemplo n.º 5
0
            else:
                action = 1
            obs, reward, done, _ = env.step([action])
            episode_reward += reward
            if render:
                env.render()
            if done:
                break
        eval_reward.append(episode_reward)
    return np.mean(eval_reward)


if __name__ == '__main__':
    # Create and wrap the environment
    env = gym.make('game-stock-exchange-continuous-v0')
    env = DummyVecEnv([lambda: env])
    action_dim = 2
    obs_shape = env.observation_space.shape
    rpm  = ReplayMemory(MEMORY_SIZE)

    model = Model(act_dim = action_dim)
    algorithm = DQN(model, act_dim = action_dim, gamma = GAMMA, lr = LEARNING_RATE)

    agent = Agent(algorithm, obs_shape[0],obs_shape[1],action_dim)

    while len(rpm) < MEMORY_WARMUP_SIZE:
        run_episode(env,agent,rpm)

    max_episode = 2000
    episode = 0
    while episode < max_episode: