def make_vec_envs(env_name, seed, num_processes, gamma, log_dir, add_timestep, device, allow_early_resets, num_frame_stack=None, args=None): if 'golmultienv' in env_name.lower(): num_processes = 1 # smuggle in real num_proc in args so we can run them as one NN envs = [ make_env(env_name, seed, i, log_dir, add_timestep, allow_early_resets, map_width=args.map_width, render_gui=args.render, print_map=args.print_map, noreward=args.no_reward, max_step=args.max_step, args=args) for i in range(num_processes) ] if 'golmultienv' in env_name.lower(): return envs[0]() if len(envs) > 1: print(envs) envs = SubprocVecEnv(envs) else: if sys.version[0] == '2': envs = DummyVecEnv('DummyVecEnv', (), {1: envs}) else: envs = DummyVecEnv(envs) if len(envs.observation_space.shape) == 1: if gamma is None: envs = VecNormalize(envs, ret=False) else: envs = VecNormalize(envs, gamma=gamma) envs = VecPyTorch(envs, device) if num_frame_stack is not None: print('stacking {} frames'.format(num_frame_stack)) envs = VecPyTorchFrameStack(envs, num_frame_stack, device) elif len(envs.observation_space.shape) == 3: envs = VecPyTorchFrameStack(envs, 1, device) return envs
def view_policy_ddpg(): env = DummyVecEnv([lambda: EnvHandler(make_env())]) view(env, seed=None, total_timesteps=10000, reward_scale=1.0, render=True, render_eval=False, noise_type=None, normalize_returns=False, normalize_observations=False, critic_l2_reg=1e-2, actor_lr=1e-4, critic_lr=1e-3, popart=False, gamma=0.99, clip_norm=None, nb_train_steps=50, nb_eval_steps=100, nb_save_epochs=None, batch_size=64, tau=0.01, action_range=(-250.0, 250.0), observation_range=(-5.0, 5.0), eval_env=None, load_path="./checkpoints/00007", save_dir=None, param_noise_adaption_interval=50)
def view_policy_ppo(): env = DummyVecEnv([lambda: EnvHandler(make_env())]) view(env=env, episodes=100, total_timesteps=1000000, nsteps=200, nminibatches=1, cliprange=0.2, ent_coef=0.0, lam=0.95, gamma=0.99, noptepochs=4, save_interval=100, save_dir=".", load_path="./checkpoints/00500", normalize_observations=False, normalize_returns=False)
def make_vec_envs(env_name, seed, num_processes, gamma, log_dir, add_timestep, device, allow_early_resets): envs = [ make_env(env_name, seed, i, log_dir, add_timestep, allow_early_resets) for i in range(num_processes) ] if len(envs) > 1: envs = SubprocVecEnv(envs) else: envs = DummyVecEnv(envs) envs = VecPyTorch(envs, device) ''' if len(envs.observation_space.shape) == 3: print('Creating frame stacking wrapper') envs = VecPyTorchFrameStack(envs, 4, device) #print(envs.observation_space) ''' return envs
else: action = 1 obs, reward, done, _ = env.step([action]) episode_reward += reward if render: env.render() if done: break eval_reward.append(episode_reward) return np.mean(eval_reward) if __name__ == '__main__': # Create and wrap the environment env = gym.make('game-stock-exchange-continuous-v0') env = DummyVecEnv([lambda: env]) action_dim = 2 obs_shape = env.observation_space.shape rpm = ReplayMemory(MEMORY_SIZE) model = Model(act_dim = action_dim) algorithm = DQN(model, act_dim = action_dim, gamma = GAMMA, lr = LEARNING_RATE) agent = Agent(algorithm, obs_shape[0],obs_shape[1],action_dim) while len(rpm) < MEMORY_WARMUP_SIZE: run_episode(env,agent,rpm) max_episode = 2000 episode = 0 while episode < max_episode: