def make_env(game_name): env = gym.make(game_name + "NoFrameskip-v4") monitored_env = bench.SimpleMonitor( env ) # puts rewards and number of steps in info, before environment is wrapped env = wrap_dqn(monitored_env) return env, monitored_env
def main(): if False: # deterministic version 4 results in a frame skip of 4 and no repeat action probability environment = gym.make('BreakoutDeterministic-v4') environment = TerminateOnEndOfLifeWrapper(environment) environment = ReshapeWrapper(environment) environment = ClipRewardWrapper(environment) environment = RepeatWrapper(environment, frames=4) else: # use the environment wrappers found in openai baselines. environment = gym.make('BreakoutNoFrameskip-v4') environment = wrap_dqn(environment) environment = DimShuffleWrapper(environment) # todo: perhaps these should be defined in the environment itself state_axes = ng.make_axes([ ng.make_axis(environment.observation_space.shape[0], name='C'), ng.make_axis(environment.observation_space.shape[1], name='H'), ng.make_axis(environment.observation_space.shape[2], name='W'), ]) agent = dqn.Agent( state_axes, environment.action_space, model=model, epsilon=dqn.linear_generator(start=1.0, end=0.1, steps=1000000), gamma=0.99, learning_rate=0.00025, memory=dqn.Memory(maxlen=1000000), target_network_update_frequency=1000, learning_starts=10000, ) rl_loop.rl_loop_train(environment, agent, episodes=200000)
def main(): env = gym.make("PongNoFrameskip-v4") env = ScaledFloatFrame(wrap_dqn(env)) model = distdeepq.models.cnn_to_dist_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=False) act = distdeepq.learn( env, p_dist_func=model, lr=1e-4, max_timesteps=2000000, # max_timesteps=100000, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=False, dist_params={ 'Vmin': -10, 'Vmax': 10, 'nb_atoms': 51 }) act.save("pong_model.pkl") env.close()
def make_env(game_name): env = gym.make(game_name + "NoFrameskip-v4") monitored_env = SimpleMonitor( env ) # puts rewards and number of steps in info, before environment is wrapped env = wrap_dqn( monitored_env ) # applies a bunch of modification to simplify the observation space (downsample, make b/w) return env, monitored_env
def make_env(game_name): env = gym.make( game_name + "NoFrameskip-v4" ) # Already performs a frame-skip of 4 @ baselines.common.atari_wrappers_deprecated monitored_env = SimpleMonitor( env ) # puts rewards and number of steps in info, before environment is wrapped env = wrap_dqn( monitored_env ) # applies a bunch of modification to simplify the observation space (downsample, make b/w) return env, monitored_env
def main(): env = gym.make("PongNoFrameskip-v4") env = ScaledFloatFrame(wrap_dqn(env)) act = DeepqWithGaze.load("pong_model.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = gym.make("PongNoFrameskip-v4") env = ScaledFloatFrame(wrap_dqn(env)) act = deepq.load("pong_model.pkl") while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def main(): env = gym.make("BreakoutNoFrameskip-v4") env = ScaledFloatFrame(wrap_dqn(env)) agent = DQN(env) agent.update_target() episodes_rewards = [0] * 100 avg_rewards = [] skip_rewards = [] step_num = 0 for episode in range(EPISODE): goal = 0 img_buf = deque() state = env.reset() while True: action = agent.egreedy_action(state) next_state, reward, done, _ = env.step(action) # env.render() # time.sleep(0.01) agent.perceive(state, action, reward, next_state, done, step_num) goal += reward step_num += 1 state = next_state if done: episodes_rewards.pop(0) episodes_rewards.append(goal) break # print "Current reward:", goal," Step number:", step_num print("Episode: ", episode, " Last 100 episode average reward: ", np.average(episodes_rewards), " Toal step number: ", step_num, " eps: ", agent.epsilon) if step_num > 2000000: break if episode % 50 == 0: skip_rewards.append(goal) if episode % 100 == 0: avg_rewards.append(np.average(episodes_rewards)) out_file = open("avg_rewards.pkl",'wb') out_file1 = open("skip_rewards.pkl",'wb') pickle.dump(avg_rewards, out_file) pickle.dump(skip_rewards, out_file1) out_file.close() out_file1.close() agent.saver.save(agent.session, 'saved_networks/' + 'network' + '-dqn', global_step=episode) env.close()
def play(): env = gym.make("BreakoutNoFrameskip-v4") env = ScaledFloatFrame(wrap_dqn(env)) agent = DQN(env) for episode in range(TEST): goal = 0 step_num = 0 state = env.reset() while True: action = agent.action(state) next_state, reward, done, _ = env.step(action) step_num += 1 env.render() time.sleep(0.01) goal += reward state = next_state if done or step_num > MAX_STEP_PER_EPISODE: print("Episode: ", episode, " Total reward: ", goal) break
def make_and_wrap_env(game_name, seed): # when updating this to non-deperecated ones, it is important to # copy over LazyFrames from baselines.common.atari_wrappers_deprecated import wrap_dqn env = gym.make(game_name + "NoFrameskip-v4") monitored_env = SimpleMonitor( env ) # puts rewards and number of steps in info, before environment is wrapped env = wrap_dqn( monitored_env ) # applies a bunch of modification to simplify the observation space (downsample, make b/w) if seed > 0: set_global_seeds(seed) env.unwrapped.seed(seed) if gflag.gym_monitor and gflag.save_dir: env = gym.wrappers.Monitor(env, os.path.join(gflag.save_dir, 'gym_monitor'), force=True) return env, monitored_env
def main(): env = gym.make("PongNoFrameskip-v4") env = ScaledFloatFrame(wrap_dqn(env)) model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True) act = simple.learn(env, q_func=model, lr=1e-4, max_timesteps=200000, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True, tf_log_dir='./log') act.save("pong_model.pkl") env.close()
def main(): env = gym.make("PongNoFrameskip-v4") env = ScaledFloatFrame(wrap_dqn(env)) model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True ) act = deepq.learn( env, q_func=model, lr=1e-4, max_timesteps=2000000, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True ) act.save("pong_model.pkl") env.close()
def make_env(game_name): env = gym.make(game_name + "NoFrameskip-v4") env_monitored = SimpleMonitor(env) env = wrap_dqn(env_monitored) return env_monitored, env
def make_env(game_name): env = gym.make(game_name + "NoFrameskip-v4") #env = bench.Monitor(env, None) env = wrap_dqn(env) return env
def make_env(game_name): env = gym.make(game_name + "NoFrameskip-v4") monitored_env = SimpleMonitor(env) # puts rewards and number of steps in info, before environment is wrapped env = wrap_dqn(monitored_env) # applies a bunch of modification to simplify the observation space (downsample, make b/w) return env, monitored_env
default=True, help= "whether or not to use stochastic actions according to models eps value" ) boolean_flag(parser, "dueling", default=False, help="whether or not to use dueling model") return parser.parse_args() args = parse_args() env = gym.make(args.env) env = wrap_dqn(env) epsilon = 0.35 MAX_EPISODES = 100000 BATCH = 32 # change to 1 while predicting max_iter = 10000 ACTIONS = env.action_space.n FACTORS = 2048 REPLAY_MEMORY = 1000000 def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.01) return tf.Variable(initial)
def make_env(game_name): env = gym.make(game_name + "NoFrameskip-v4") env = SimpleMonitor(env) env = wrap_dqn(env) return env
def make_env(game_name): env = gym.make(game_name + "NoFrameskip-v3") env_monitored = SimpleMonitor(env) env = wrap_dqn(env_monitored) return env_monitored, env
def make_env(env_name, seed): env = gym.make(env_name) env.seed(seed) return wrap_dqn(env)
#import sys #sys.path.append("../gym") #import gym import gym from baselines import deepq from baselines.common.atari_wrappers_deprecated import wrap_dqn, ScaledFloatFrame # Create the Baseline game environment # TRAIN env = gym.make("PongNoFrameskip-v4") env = ScaledFloatFrame(wrap_dqn(env)) model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True ) act = deepq.learn( env, q_func=model, lr=1e-4, max_timesteps=2000000, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000,