def test_doubleqagent(self): env = gym.make('CartPole-v0') num_features = env.observation_space.shape[0] num_actions = env.action_space.n model = Sequential() model.add(Dense(16, input_dim=num_features, activation='relu')) model.add(Dense(16, activation='relu')) model.add(Dense(16, activation='relu')) model.add(Dense(units=num_actions, activation='linear')) model.compile(loss='mse', optimizer=rmsprop(lr=1e-3)) agent = DoubleDeepQAgent(env=env, model=model, policy=BoltzmannPolicy(), memory=PrioritizedMemory(maxlen=50000), metrics=[ EpisodeReturn(), RollingEpisodeReturn(), CumulativeReward(), EpisodeTime() ], gamma=0.99, max_steps_per_episode=500) import pickle s = agent.__getstate__() t0 = pickle.dumps(agent) t1 = pickle.loads(t0) agent = DoubleDeepQAgent(env=env, model=model, policy=EpsilonGreedyPolicy(min=0.05, max=0.5, decay=0.999), memory=PrioritizedMemory(maxlen=50000), metrics=[ EpisodeReturn(), RollingEpisodeReturn(), CumulativeReward(), EpisodeTime() ], gamma=0.99, max_steps_per_episode=1000) s = agent.__getstate__() t0 = pickle.dumps(agent) t1 = pickle.loads(t0) pass
def build_agent_with_shaping(): env = gym.make('MountainCar-v0') num_features = DoubleDeepQAgent._get_space_size(env.observation_space) num_actions = DoubleDeepQAgent._get_space_size(env.action_space) model = Sequential() model.add(Dense(16, input_dim=num_features, activation='relu')) model.add(Dense(16, activation='relu')) model.add(Dense(16, activation='relu')) model.add(Dense(units=num_actions, activation='linear')) model.compile(loss='mse', optimizer=adam()) agent = DoubleDeepQAgent(name='AgentWithShaping', env=env, model=model, policy=EpsilonGreedyPolicy(min=0.05, max=0.5, decay=0.999), memory=PrioritizedMemory(maxlen=50000), metrics=[ EpisodeReturn(), RollingEpisodeReturn(), CumulativeReward(), EpisodeTime() ], gamma=0.99, max_steps_per_episode=1000) agent.preprocess_state = shape_reward return agent
def build_agent(name): env = gym.make('LunarLander-v2') num_features = DoubleDeepQAgent._get_space_size(env.observation_space) num_actions = DoubleDeepQAgent._get_space_size(env.action_space) model = Sequential([ Dense(64, input_dim=num_features, activation='relu'), Dense(64, activation='relu'), Dense(units=num_actions, activation='linear') ]) model.compile(loss='mse', optimizer='sgd') agent = DoubleDeepQAgent(name=name, env=env, model=model, policy=EpsilonGreedyPolicy(min=0.05, max=0.5, decay=0.999), memory=PrioritizedMemory(maxlen=50000), metrics=[ EpisodeReturn(), RollingEpisodeReturn(), CumulativeReward(), EpisodeTime() ], gamma=0.99, max_steps_per_episode=500) return agent
set_seed(0) env = gym.make('CartPole-v0') num_features = env.observation_space.shape[0] num_actions = env.action_space.n model = Sequential() model.add(Dense(16, input_dim=num_features, activation='relu')) model.add(Dense(16, activation='relu')) model.add(Dense(16, activation='relu')) model.add(Dense(units=num_actions, activation='linear')) model.compile(loss='mse', optimizer=rmsprop(lr=1e-3)) print(model.summary()) memory = PrioritizedMemory(maxlen=50000) from deeprl.memories import Memory memory = Memory(maxlen=50000) policy = BoltzmannPolicy() agent = DoubleDeepQAgent(env=env, model=model, policy=policy, memory=memory, gamma=0.99, metrics=[ EpisodeReturn(), RollingEpisodeReturn(), CumulativeReward(), EpisodeTime() ],
actor = Sequential([ Dense(128, input_dim=num_features, activation='relu'), Dense(128, activation='relu'), Dense(units=num_actions, activation='tanh') ]) actor.compile(loss='mse', optimizer=rmsprop(lr=1e-4)) #optimizer=sgd(lr=1e-13)) critic_state_input = Input(shape=(num_features, ), name='critic_state_input') critic_action_input = Input(shape=(num_actions, ), name='critic_action_input') critic_merged_input = concatenate([critic_state_input, critic_action_input]) critic_h1 = Dense(128, activation='relu', name='critic_h1')(critic_merged_input) critic_h2 = Dense(128, activation='relu', name='critic_h2')(critic_h1) critic_out = Dense(1, activation='linear', name='CriticOut')(critic_h2) critic = Model(inputs=[critic_state_input, critic_action_input], outputs=[critic_out]) critic.compile(sgd(lr=1e-3, clipnorm=5.), 'mse') memory = PrioritizedMemory(maxlen=1e6, sample_size=32) agent = ActorCriticAgent(env=env, actor=actor, critic=critic, memory=memory, policy=NoisyPolicy(0.15, 0.5, clip=env.action_space), max_steps_per_episode=500, tb_path='tensorboard') agent.train(max_episodes=10000, render_every_n=50, target_model_update=1e-4)
import deeprl.utils.metrics as metrics import logging logger = logging.getLogger() logger.setLevel(logging.INFO) logger.handlers[0].setLevel(logging.INFO) #set_seed(0) env = gym.make('LunarLander-v2') num_features = env.observation_space.shape[0] num_actions = env.action_space.n model = Sequential([ Dense(64, input_dim=num_features, activation='relu'), Dense(64, activation='relu'), Dense(units=num_actions, activation='linear') ]) model.compile(loss='mse', optimizer=rmsprop(lr=0.0016, decay=0.000001, clipnorm=1.)) memory = PrioritizedMemory(maxlen=50000, sample_size=32) policy = BoltzmannPolicy() agent = DoubleDeepQAgent(env=env, model=model, policy=policy, memory=memory, gamma=0.99, max_steps_per_episode=500, tb_path='tensorboard') agent.train(target_model_update=1e-3, max_episodes=500, render_every_n=50) agent.test(num_episodes=10, render_every_n=1)