def test_single_continuous_dqn_input(): nb_actions = 2 V_model = Sequential() V_model.add(Flatten(input_shape=(2, 3))) V_model.add(Dense(1)) mu_model = Sequential() mu_model.add(Flatten(input_shape=(2, 3))) mu_model.add(Dense(nb_actions)) L_input = Input(shape=(2, 3)) L_input_action = Input(shape=(nb_actions, )) x = concatenate([Flatten()(L_input), L_input_action]) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) L_model = Model(input=[L_input_action, L_input], output=x) memory = SequentialMemory(limit=10, window_length=2) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=5, batch_size=4) agent.compile('sgd') agent.fit(MultiInputTestEnv((3, )), nb_steps=10)
def test_cdqn(): # TODO: replace this with a simpler environment where we can actually test if it finds a solution env = gym.make('Pendulum-v0') np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.shape[0] V_model = Sequential() V_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) V_model.add(Dense(16)) V_model.add(Activation('relu')) V_model.add(Dense(1)) mu_model = Sequential() mu_model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(nb_actions)) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') x = concatenate([action_input, Flatten()(observation_input)]) x = Dense(16)(x) x = Activation('relu')(x) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) L_model = Model(input=[action_input, observation_input], output=x) memory = SequentialMemory(limit=1000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=1e-3)) agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100) h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100)
def test_multi_continuous_dqn_input(): nb_actions = 2 V_input1 = Input(shape=(2, 3)) V_input2 = Input(shape=(2, 4)) x = concatenate([V_input1, V_input2]) x = Flatten()(x) x = Dense(1)(x) V_model = Model(input=[V_input1, V_input2], output=x) mu_input1 = Input(shape=(2, 3)) mu_input2 = Input(shape=(2, 4)) x = concatenate([mu_input1, mu_input2]) x = Flatten()(x) x = Dense(nb_actions)(x) mu_model = Model(input=[mu_input1, mu_input2], output=x) L_input1 = Input(shape=(2, 3)) L_input2 = Input(shape=(2, 4)) L_input_action = Input(shape=(nb_actions, )) x = concatenate([L_input1, L_input2]) x = concatenate([Flatten()(x), L_input_action]) x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) L_model = Model(input=[L_input_action, L_input1, L_input2], output=x) memory = SequentialMemory(limit=10, window_length=2) processor = MultiInputProcessor(nb_inputs=2) agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, memory=memory, nb_steps_warmup=5, batch_size=4, processor=processor) agent.compile('sgd') agent.fit(MultiInputTestEnv([(3, ), (4, )]), nb_steps=10)
def test_ddpg(): # TODO: replace this with a simpler environment where we can actually test if it finds a solution env = gym.make('Pendulum-v0') np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.shape[0] actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = concatenate([action_input, flattened_observation]) x = Dense(16)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) memory = SequentialMemory(limit=1000, window_length=1) random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)]) agent.fit(env, nb_steps=400, visualize=False, verbose=0, nb_max_episode_steps=100) h = agent.test(env, nb_episodes=2, visualize=False, nb_max_episode_steps=100)
mu_model = Sequential() mu_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(16)) mu_model.add(Activation('relu')) mu_model.add(Dense(nb_actions)) mu_model.add(Activation('linear')) print(mu_model.summary()) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + env.observation_space.shape, name='observation_input') x = concatenate([action_input, Flatten()(observation_input)]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(((nb_actions * nb_actions + nb_actions) / 2))(x) x = Activation('linear')(x) L_model = Model(input=[action_input, observation_input], output=x) print(L_model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! processor = PendulumProcessor() memory = SequentialMemory(limit=100000, window_length=1)
actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(400)) actor.add(Activation('relu')) actor.add(Dense(300)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('tanh')) print(actor.summary()) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Dense(400)(flattened_observation) x = Activation('relu')(x) x = concatenate([x, action_input]) x = Dense(300)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(input=[action_input, observation_input], output=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions,