def build_ddpg_model(env, actor_hidden_layers, critic_hidden_layers, gamma, learning_rate): actor = build_actor(env, actor_hidden_layers) critic, action_input = build_critic(env, critic_hidden_layers) nb_actions = env.action_space.shape[0] memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.2, mu=0.1, sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=learning_rate, clipnorm=1.), metrics=['mae']) return agent
def get_agent(drlm): print('tesing', '.' * 60) actor = drlm.actor critic = drlm.critic nb_actions = drlm.nb_actions action_input = drlm.action_input processor = TorcsProcessor() # load weights print('loading weights ', load_weights, '.' * 60) if load_weights: actor.load_weights(alw) critic.load_weights(clw) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.5, mu=0., sigma=.3) # random_process = ExplorationNoise(nb_steps=epochs, # epsilon=1.0, # steer=OrnsteinUhlenbeckProcess(theta=0.6, mu=0, sigma=0.3), # accel_brake=OrnsteinUhlenbeckProcess(theta=1.0, mu=0.5, sigma=0.3), # noise=1) agent = DDPGAgent(nb_actions=nb_actions, batch_size=batch_size, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, processor=processor, nb_steps_warmup_critic=nb_steps_warmup_critic, nb_steps_warmup_actor=nb_steps_warmup_actor, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.0001, clipnorm=1.), metrics=[loss]) return agent
def main(): # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) env = wrappers.Monitor(env, '/tmp/{}'.format(ENV_NAME), force=True) np.random.seed(123) env.seed(123) assert len(env.action_space.shape) == 1 action_shape = env.action_space.shape[0] observation_shape = env.observation_space.shape actor = create_actor(observation_shape, action_shape) action_input = Input(shape=(action_shape,), name='action_input') observation_input = Input(shape=(1,) + observation_shape, name='observation_input') critic = create_critic(observation_input, action_input) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=action_shape, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=action_shape, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=BipedalProcessor()) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) agent.load_weights('ddpg_{}_weights.h5f'.format(ENV_NAME)) #agent.fit() agent.fit(env, nb_steps=3000000, visualize=False, verbose=2) agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
def __init__(self, env): self.nb_actions = env.action_space.shape[0] self.nb_states = env.observation_space.shape[0] self.env = env self.actor = self.build_actor(env) self.actor.compile('Adam', 'mse') self.critic, action_input = self.build_critic(env) self.loss = self.build_loss() self.processor = WhiteningNormalizerProcessor() self.memory = SequentialMemory(limit=5000000, window_length=1) self.random_process = OrnsteinUhlenbeckProcess(size=self.nb_actions, theta=0.75, mu=0.5, sigma=0.25) self.agent = DDPGAgent(nb_actions=self.nb_actions, actor=self.actor, critic=self.critic, critic_action_input=action_input, memory=self.memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=self.random_process, gamma=.99, target_model_update=1e-3, processor=self.processor) self.agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=self.loss) self.sym_actor = self.build_sym_actor() self.sym_actor.compile(optimizer='Adam', loss='mse')
class RLAgent: def __init__(self, env): np.random.seed(123) env.seed(123) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] # Next, we build a very simple model. self.actor = Sequential() self.actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) self.actor.add(Dense(16)) self.actor.add(Activation('relu')) self.actor.add(Dense(16)) self.actor.add(Activation('relu')) self.actor.add(Dense(16)) self.actor.add(Activation('relu')) self.actor.add( Dense(nb_actions, activation='tanh', kernel_initializer=RandomUniform())) self.actor.add(Lambda(lambda x: x * 60.0)) print(self.actor.summary()) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) self.agent = DDPGAgent(nb_actions=nb_actions, actor=self.actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
def __init__(self, observation_space, action_space, filename='KerasDDPGAgent.h5f'): nb_actions = action_space.shape[0] # Actor network actor = Sequential() actor.add(Flatten(input_shape=(1, ) + observation_space.shape)) actor.add(Dense(32)) actor.add(Activation('relu')) actor.add(Dense(32)) actor.add(Activation('relu')) actor.add(Dense(32)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('sigmoid')) print(actor.summary()) # Critic network action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = concatenate([action_input, flattened_observation]) x = Dense(64)(x) x = Activation('relu')(x) x = Dense(64)(x) x = Activation('relu')(x) x = Dense(64)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Setup Keras RL's DDPGAgent memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=nb_actions) self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_clip=1.) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) self.filename = filename
def buildAgent(env): nb_actions = env.action_space.shape[0] # Create networks for DDPG # Next, we build a very simple model. actor = Sequential() print((-1, env.observation_space.shape[0])) actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(32)) actor.add(Activation('tanh')) actor.add(Dense(32)) actor.add(Activation('tanh')) actor.add(Dense(32)) actor.add(Activation('tanh')) actor.add(Dense(nb_actions)) actor.add(Activation('sigmoid')) print(actor.summary()) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = concatenate([action_input, flattened_observation]) x = Dense(64)(x) x = Activation('tanh')(x) x = Dense(64)(x) x = Activation('tanh')(x) x = Dense(64)(x) x = Activation('tanh')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Set up the agent for training memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_clip=1.) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) return agent
def main(args): CUDA = torch.cuda.is_available() OUTPUT_RESULTS_DIR = './saver' ENVIRONMENT = 'SemisuperPendulumRandom-v0' TIMESTAMP = datetime.now().strftime("%Y%m%d-%H%M%S") SUMMARY_DIR = os.path.join(OUTPUT_RESULTS_DIR, "DDPG", ENVIRONMENT, TIMESTAMP) env = gym.make(ENVIRONMENT) env = wrappers.Monitor(env, SUMMARY_DIR, force=True) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] action_bound = env.action_space.high actor = ActorNetwork(state_dim, action_dim, action_bound, args.actor_lr, args.tau, args.seed) target_actor = ActorNetwork(state_dim, action_dim, action_bound, args.actor_lr, args.tau, args.seed) critic = CriticNetwork(state_dim, action_dim, action_bound, args.critic_lr, args.tau, args.l2_decay, args.seed) target_critic = CriticNetwork(state_dim, action_dim, action_bound, args.critic_lr, args.tau, args.l2_decay, args.seed) if CUDA: actor = actor.cuda() target_actor = target_actor.cuda() critic = critic.cuda() target_critic = target_critic.cuda() replay_buffer = ReplayBuffer(args.bufferlength, args.seed) agent = DDPGAgent(actor, target_actor, critic, target_critic, replay_buffer, batch_size=args.batch_size, gamma=args.gamma, seed=args.seed, episode_len=args.episode_len, episode_steps=args.episode_steps, noise_mean=args.noise_mean, noise_th=args.noise_th, noise_std=args.noise_std, noise_decay=args.noise_decay) if args.is_train: agent.train(env) agent.save_actor_weights(save_dir=OUTPUT_RESULTS_DIR, filename=args.actor_weights) else: agent.load_actor_weights(save_dir=OUTPUT_RESULTS_DIR, filename=args.actor_weights) agent.test(env)
def create(env): np.random.seed(config.current.domain_seed) env.seed(config.current.domain_seed) nb_actions = env.action_space.n # Next, we build a very simple model. actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(config.current.agent_vfn_complexity)) actor.add(Activation('relu')) actor.add(Dense(config.current.agent_vfn_complexity)) actor.add(Activation('relu')) actor.add(Dense(config.current.agent_vfn_complexity)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) #print(actor.summary()) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(config.current.agent_vfn_complexity)(x) x = Activation('relu')(x) x = Dense(config.current.agent_vfn_complexity)(x) x = Activation('relu')(x) x = Dense(config.current.agent_vfn_complexity)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) #print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=ArgmaxProcessor()) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) return agent
def build_agent(env): assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] print(env.observation_space.shape) # Actor model actor = Sequential() actor.add( Flatten(input_shape=(window_length, ) + env.observation_space.shape)) actor.add(Dense(128, activation="relu")) actor.add(Dense(128, activation="relu")) actor.add(Dense(64, activation="relu")) actor.add(Dense(nb_actions, activation="tanh")) actor.summary() # Critic model action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(window_length, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(256, activation="relu")(x) x = Dense(256, activation="relu")(x) x = Dense(128, activation="relu")(x) x = Dense(1, activation="linear")(x) critic = Model(inputs=[action_input, observation_input], outputs=x) critic.summary() memory = SequentialMemory(limit=1000000, window_length=window_length) # Exploration policy - has a great effect on learning. Should encourage forward motion. # theta - how fast the process returns to the mean # mu - mean value - this should be greater than 0 to encourage forward motion # sigma - volatility of the process random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.5, mu=0.4, sigma=0.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) return agent
def configure(self, observation_space_shape, nb_actions): # Next, we build a simple model. # actor network actor = Sequential() actor.add(Flatten(input_shape=(1, ) + observation_space_shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) print(actor.summary()) # critic network action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + observation_space_shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = concatenate([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
def visualize(session_name): kwargs = {'viewer': True} ENV_NAME = 'singlePendulum-v0' env = gym.make(ENV_NAME, **kwargs) np.random.seed(7) env.seed(7) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] actor, critic, action_input = create_networks(env) memory = SequentialMemory(limit=400, window_length=1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory) agent.compile(Adam(lr=.0005, clipnorm=1., epsilon=1.e-7, beta_1=0.9, beta_2=0.999), metrics=['mae']) checkpoint_filepath = 'checkpoint/ddpg_{}_{}_weights.h5f'.format( ENV_NAME, session_name) filepath = 'ddpg_{}_{}_weights.h5f'.format(ENV_NAME, session_name) agent.load_weights(filepath=filepath) env.viewer = True agent.test(env, nb_episodes=1, visualize=False, nb_max_episode_steps=400) env.close()
def _build_ddpg(nb_actions, nb_states): # build an actor network actor = Sequential() actor.add(Flatten(input_shape=(1, nb_states))) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('sigmoid')) # build a critic network action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, nb_states), name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) # tricks: memory = SequentialMemory(limit=10240, window_length=1) oup = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) # build ddpg agent ddpg = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_actor=100, nb_steps_warmup_critic=100, random_process=oup, gamma=.99, target_model_update=1e-3) ddpg.compile(Adam(), metrics=['mae']) return ddpg
def build_agent(num_action, observation_shape): actor = build_actor_model(num_action, observation_shape) critic, critic_action_input = build_critic_model(num_action, observation_shape) memory = SequentialMemory(limit=10**5, window_length=1) agent = DDPGAgent(num_action, actor, critic, critic_action_input, memory) return agent
def create_agent(observation_space, action_space, hyperparams): assert len(action_space.shape) == 1 nb_actions = action_space.shape[0] actor = Sequential() actor.add(Flatten(input_shape=(1, ) + observation_space.shape)) for layer in hyperparams['actor']['layers']: actor.add(Dense(layer['neurons'].value)) actor.add(Activation(layer['activation'].value)) actor.add(Dense(nb_actions)) actor.add(Activation(hyperparams['actor']['output_activation'].value)) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) for layer in hyperparams['critic']['layers']: x = Dense(layer['neurons'].value, activation=layer['activation'].value)(x) x = Dense(1, activation=hyperparams['critic']['output_activation'].value)(x) critic = Model(inputs=[action_input, observation_input], outputs=x) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(optimizer=hyperparams['optimizer'].value, metrics=['mae']) return actor, agent
def get_agent(env) -> DDPGAgent: """ Generate a `DDPGAgent` instance that represents an agent learned using Deep Deterministic Policy Gradient. The agent has 2 neural networks: an actor network and a critic network. Args: * `env`: An OpenAI `gym.Env` instance. Returns: * a `DDPGAgent` instance. """ nb_actions = env.action_space.shape[0] action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') actor = Sequential() actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('tanh')) flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, gamma=.99, target_model_update=1e-3)#random_process=random_process, agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) return agent
class DDPG(): def __init__(self, Env): self.env = Env nb_actions = self.env.action_space.shape[0] actor = Sequential() actor.add(Flatten(input_shape=(1,) + self.env.observation_space.shape)) actor.add(Dense(5)) actor.add(Activation('relu')) actor.add(Dense(8)) actor.add(Activation('relu')) actor.add(Dense(5)) actor.add(Activation('relu')) # actor.add(Dense(16)) # actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('softmax')) # print(actor.summary()) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + Env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = concatenate([action_input, flattened_observation], name = 'concatenate') x = Dense(5)(x) x = Activation('relu')(x) x = Dense(8)(x) x = Activation('relu')(x) x = Dense(5)(x) x = Activation('relu')(x) # x = Dense(32)(x) # x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) # print(critic.summary()) memory = SequentialMemory(limit=100000, window_length=1) # random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) random_process = None self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=32, nb_steps_warmup_actor=32, random_process=random_process, gamma=0, target_model_update=0.001) self.agent.processor = ShowActionProcessor(self.agent, self.env) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) def fit(self): history = self.agent.fit(self.env, action_repetition=1, nb_steps=20000, visualize=False, verbose=1, nb_max_episode_steps=10) return history def save_weights(self): self.agent.save_weights('./store/ddpg_{}_weights2.h5f'.format("porfolio"), overwrite=True) def test(self): history = self.agent.test(self.env, nb_episodes=1, visualize=False, nb_max_episode_steps=10) return history def load_weights(self): self.agent.load_weights('./store/ddpg_{}_weights2.h5f'.format("porfolio"))
def build_agent(self): ### Building Agent log_info("building DDPGAgent ...") self.agent = DDPGAgent(nb_actions=self.nb_actions, actor=self.networks.actor, critic=self.networks.critic, critic_action_input=self.networks.action_input, memory=self.networks.memory, nb_steps_warmup_critic=self.nb_steps_warmup_critic, nb_steps_warmup_actor=self.nb_steps_warmup_actor, random_process=self.networks.random_process, gamma=self.gamma, target_model_update=self.target_model_update, delta_clip=self.delta_clip) # self.agent = DDPGAgent(nb_actions=self.nb_actions, actor=self.networks.actor, critic=self.networks.critic, critic_action_input=self.networks.action_input, # memory=self.networks.memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, # random_process=self.networks.random_process, gamma=.99, target_model_update=1e-3, # delta_clip=1.) log_info("Adding optimizer ...") self.agent.compile(self.optimizer, self.metrics) log_info("Optimizer added.")
def build_agent(nS, nA, action_min, action_max): actor = build_actor(nS, nA, action_min, action_max) action_input, critic = build_critic(nS, nA) agent = DDPGAgent(nb_actions=nA, actor=actor, critic=critic, critic_action_input=action_input, memory=SequentialMemory(limit=100000, window_length=1), nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=OrnsteinUhlenbeckProcess(size=nA, theta=0.15, sigma=0.3), gamma=0.99, target_model_update=0.001) agent.compile(Adam(lr=0.001, clipnorm=1.), metrics=['mae']) return agent
def __init__(self, Env): self.env = Env nb_actions = self.env.action_space.shape[0] actor = Sequential() actor.add(Flatten(input_shape=(1,) + self.env.observation_space.shape)) actor.add(Dense(5)) actor.add(Activation('relu')) actor.add(Dense(8)) actor.add(Activation('relu')) actor.add(Dense(5)) actor.add(Activation('relu')) # actor.add(Dense(16)) # actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('softmax')) # print(actor.summary()) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + Env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = concatenate([action_input, flattened_observation], name = 'concatenate') x = Dense(5)(x) x = Activation('relu')(x) x = Dense(8)(x) x = Activation('relu')(x) x = Dense(5)(x) x = Activation('relu')(x) # x = Dense(32)(x) # x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) # print(critic.summary()) memory = SequentialMemory(limit=100000, window_length=1) # random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) random_process = None self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=32, nb_steps_warmup_actor=32, random_process=random_process, gamma=0, target_model_update=0.001) self.agent.processor = ShowActionProcessor(self.agent, self.env) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
def compile_agent(self): # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! processor = DistopiaProcessor(self.num_blocks, self.num_actions) memory = SequentialMemory(limit=50000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) self.agent = DDPGAgent(nb_actions=self.nb_actions, actor=self.actor, critic=self.critic, critic_action_input=self.action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
def create_actor_critic_agent(): # create ddpg agent memory = SequentialMemory(limit=1000000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.5, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=create_actor(), critic=create_critic(), critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.8, target_model_update=1e-3, processor=UnitsProcessor(n_sonars=env.robot.n_sonars)) agent.compile(Adam(lr=.001), metrics=['mse']) return agent
def __init__(self, env): # rospack = rospkg.RosPack() # self.working_dir = rospack.get_path('neuroracer_gym_rl') # self.weight_backup = os.path.join(self.working_dir, "neuroracer.h5") self.env = env self.observation_space = self.env.observation_space self.action_space = self.env.action_space self.nb_actions = self.env.action_space.shape[0] self.batch_size = 16 self.max_buffer = 100000 self.window_length = 16 self.memory = SequentialMemory(limit=self.max_buffer, window_length=self.window_length) self.learning_rate_actor = 0.0001 self.learning_rate_critic = 0.001 self.gamma = 0.9 self.exploration_rate = 0.95 self.exploration_min = 0.01 self.exploration_decay = 0.995 random_process = OrnsteinUhlenbeckProcess(size=self.nb_actions, theta=.15, mu=0., sigma=.2) actor = self._create_actor() critic, critic_action_input = self._create_critic(self.nb_actions) self.model = DDPGAgent(nb_actions=self.nb_actions, actor=actor, critic=critic, critic_action_input=critic_action_input, memory=self.memory, nb_steps_warmup_critic=500, nb_steps_warmup_actor=500, random_process=random_process, gamma=self.gamma, target_model_update=.001, # processor=self.processor, batch_size=self.batch_size) self.model.compile( (Adam(lr=self.learning_rate_actor, clipnorm=1.), Adam(lr=self.learning_rate_critic, clipnorm=1.)), metrics=['mse'])
def test_ddpg(): # TODO: replace this with a simpler environment where we can actually test if it finds a solution env = gym.make('Pendulum-v0') np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.shape[0] actor = Sequential() actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(16)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=1000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)]) agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100) h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100)
def keras_rl(env, model_name, saved_model_name="model", steps=50000, test_steps=5, visualize=False, hidden_layers=3, critic_hidden_layers=3): nb_actions = 0 if (model_name == "DQN" or model_name == "SARSA"): nb_actions = env.action_space.n elif (model_name == "DDPG"): nb_actions = env.action_space.shape[0] model_structure = define_layers(env, nb_actions, num_of_hidden_layers=hidden_layers) memory = define_memory() policy = define_policy(model_name) if (model_name == "DQN"): model = DQNAgent(model=model_structure, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, enable_double_dqn=True, dueling_type='avg', target_model_update=1e-2) elif (model_name == "SARSA"): model = SARSAAgent(model=model_structure, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy) elif (model_name == "DDPG"): action_input, critic_layers = define_critic_layers( env, num_of_hidden_layers=critic_hidden_layers) random_process = define_random_process(nb_actions) model = DDPGAgent(nb_actions=nb_actions, actor=model_structure, critic=critic_layers, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) model.compile(Adam(lr=1e-3), metrics=['mae']) model.fit(env, nb_steps=steps, visualize=False, verbose=2) model.save_weights('{}.h5f'.format(model_name), overwrite=True) model.test(env, nb_episodes=test_steps, visualize=visualize)
class KerasDDPGAgent(KerasAgent): """ An DDPG agent using Keras library with Keras RL. For more details about Deep Deterministic Policy Gradient algorithm, check "Continuous control with deep reinforcement learning" by Lillicrap. https://arxiv.org/abs/1509.02971 """ def __init__(self, observation_space, action_space, filename='KerasDDPGAgent.h5f'): #from keras.layers.normalization import BatchNormalization nb_actions = action_space.shape[0] # Actor network actor = Sequential() actor.add(Flatten(input_shape=(1,) + observation_space.shape)) actor.add(Dense(32),activation='selu')) actor.add(Dense(32),activation='selu') actor.add(Dense(32),activation='selu') actor.add(Dense(nb_actions),activation='sigmoid') print(actor.summary()) # Critic network action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = concatenate([action_input, flattened_observation]) x = Dense(64)(x) x = Activation('tanh')(x) x = Dense(64)(x) x = Activation('tanh')(x) x = Dense(64)(x) x = Activation('tanh')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Setup Keras RL's DDPGAgent memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.1, mu=0., sigma=.2, dt=1e-2,size=nb_actions,sigma_min=.05,n_steps_annealing=1e6) self.agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.96, target_model_update=1e-4, delta_clip=1.)
def create_ddpg_agent(env): nb_actions = env.action_space.n policy = BoltzmannQPolicy() actor = Sequential() actor.add(Flatten(input_shape=(1,) + (env.observation_space.shape[1],))) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + (env.observation_space.shape[1],), name='observation_input') flattened_observation = Flatten()(observation_input) x = merge([action_input, flattened_observation], mode='concat') x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) return agent
def build_agent(num_action, observation_shape): actor = build_actor_model(num_action, observation_shape) critic, critic_action_input = build_critic_model(num_action, observation_shape) memory = SequentialMemory(limit=10**6, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=num_action, theta=0.15, mu=0, sigma=0.3) agent = DDPGAgent( num_action, actor, critic, critic_action_input, memory, random_process=random_process ) return agent
def _make_agent(self): """Internal helper function to create an actor-critic custom agent model """ if self.actor is None: self._make_actor() if self.critic is None: self._make_critic() memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=self.n_actions, theta=.15, mu=0., sigma=.3) self.agent = DDPGAgent(nb_actions=self.n_actions, actor=self.actor, critic=self.critic, critic_action_input=self.action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
class actor_critic(keras_model): def __init__(self,shared_object): log_info('building actor-critic specific parameters ') self.gamma = shared_object.get('gamma',.99) self.nb_steps_warmup_critic = shared_object.get('nb_steps_warmup_critic',100) self.nb_steps_warmup_actor = shared_object.get('nb_steps_warmup_actor',100) self.target_model_update = shared_object.get('target_model_update',1.0e-3) self.delta_clip = shared_object.get('delta_clip',1.) log_info('loading actor critic specific parameters is done') super(self.__class__,self).__init__(shared_object) def build_agent(self): ### Building Agent log_info("building DDPGAgent ...") self.agent = DDPGAgent(nb_actions=self.nb_actions, actor=self.networks.actor, critic=self.networks.critic, critic_action_input=self.networks.action_input, memory=self.networks.memory, nb_steps_warmup_critic=self.nb_steps_warmup_critic, nb_steps_warmup_actor=self.nb_steps_warmup_actor, random_process=self.networks.random_process, gamma=self.gamma, target_model_update=self.target_model_update, delta_clip=self.delta_clip) # self.agent = DDPGAgent(nb_actions=self.nb_actions, actor=self.networks.actor, critic=self.networks.critic, critic_action_input=self.networks.action_input, # memory=self.networks.memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, # random_process=self.networks.random_process, gamma=.99, target_model_update=1e-3, # delta_clip=1.) log_info("Adding optimizer ...") self.agent.compile(self.optimizer, self.metrics) log_info("Optimizer added.")
def test_ddpg(): # TODO: replace this with a simpler environment where we can actually test if it finds a solution env = gym.make('Pendulum-v0') np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.shape[0] actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(16)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=1000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)]) agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100) h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100)
x = Dense(64)(x) x = Activation('relu')(x) x = Dense(64)(x) x = Activation('relu')(x) x = Dense(64)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Set up the agent for training memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_clip=1.) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=False, verbose=1, nb_max_episode_steps=200, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.model, overwrite=True) if not args.train:
x = Activation('relu')(x) # Output Layer x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=2*NUM_STEPS, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) # random_process = OrnsteinUhlenbeckProcess(size=nb_actions, dt = env.tau, theta=1.0, mu=0.0, sigma=0.5, sigma_min=0.3, n_steps_annealing=NUM_STEPS) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.999, target_model_update=1e-3, delta_clip=1.0) agent.compile(Adam(lr=3e-4, clipnorm=1.0), metrics=['mae']) # Optionally, we can reload a previous model's weights and continue training from there # Remove the _actor or _critic from the filename. The load method automatically # appends these. WEIGHTS_FILENAME = 'weights/ddpg_{}_weights.h5f'.format(ENV_NAME) # agent.load_weights(WEIGHTS_FILENAME) callbacks = []
x = (Dense(LAYER_SIZE))(x) x = Activation('relu')(x) # Output Layer x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=2*NUM_STEPS, window_length=1) # random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, dt = env.tau, theta=0.6, mu=0.0, sigma=0.5, sigma_min=0.15, n_steps_annealing=NUM_STEPS) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.999, target_model_update=1e-3, delta_clip=1.0) agent.compile(Adam(lr=.001, clipnorm=1.0), metrics=['mae']) # Load the model weights - this method will automatically load the weights for # both the actor and critic agent.load_weights(FILENAME) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True,action_repetition=5) #nb_max_episode_steps=500,
x = Activation('relu')(x) # Output Layer x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=2*NUM_STEPS, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) # random_process = OrnsteinUhlenbeckProcess(size=nb_actions, dt = env.tau, theta=1.0, mu=0.0, sigma=0.5, sigma_min=0.3, n_steps_annealing=NUM_STEPS) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.999, target_model_update=1e-3, delta_clip=1.0) agent.compile(Adam(lr=.001, clipnorm=1.0), metrics=['mae']) # Optionally, we can reload a previous model's weights and continue training from there # Remove the _actor or _critic from the filename. The load method automatically # appends these. WEIGHTS_FILENAME = 'weights/ddpg_planar_crane_continuous-v0_weights.h5f' # agent.load_weights(WEIGHTS_FILENAME) callbacks = []
x = concatenate([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) plot_model(critic, to_file='critic.png', show_shapes=True) # # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # # even the metrics! memory = SequentialMemory(limit=10000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=0.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) # # Okay, now it's time to learn something! We visualize the training here for show, but this # # slows down training quite a lot. You can always safely abort the training prematurely using # # Ctrl + C. agent.fit(env, nb_steps=25000, visualize=False, verbose=1, nb_max_episode_steps=200) # # After training is done, we save the final weights. agent.save_weights('ddpg_stokes_weights.h5f', overwrite=True) # # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
x = Activation('relu')(x) x = Dense(64)(x) x = Activation('relu')(x) x = Dense(64)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.2, size=env.noutput) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_range=(-100., 100.)) # agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model, # memory=memory, nb_steps_warmup=1000, random_process=random_process, # gamma=.99, target_model_update=0.1) #agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. if args.train: agent.fit(env, nb_steps=nallsteps, visualize=True, verbose=1, nb_max_episode_steps=env.timestep_limit, log_interval=10000) # After training is done, we save the final weights. agent.save_weights(args.output, overwrite=True)
x = Dense(400)(flattened_observation) x = Activation('relu')(x) x = Concatenate()([x, action_input]) x = Dense(300)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=MujocoProcessor()) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=1000000, visualize=False, verbose=1) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)
x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, delta_range=(-10., 10.)) agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=1000000, visualize=True, verbose=1, nb_max_episode_steps=200) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)