Example #1
0
    def test_keras_pipeline():
        class MyEstimator(KerasEstimator):
            def build(self):
                self.model = tf.keras.models.Sequential()
                self.model.add(
                    tf.keras.layers.Dense(self.observation_size,
                                          activation='relu',
                                          input_shape=self.observation_shape))
                self.model.add(tf.keras.layers.Dense(self.action_size))

                self.model.compile(
                    tf.keras.optimizers.Adam(learning_rate=self.learning_rate),
                    loss='mse')

            def preprocess(self, observations, actions):
                return observations

        env = CatchEnv()
        custom_action_value = MyEstimator(
            observation_space=env.observation_space,
            action_space=env.action_space,
            batch_size=64,
            freezed_steps=20)
        agent = StandardAgent(observation_space=env.observation_space,
                              action_space=env.action_space,
                              action_values=custom_action_value)

        pg = rl.Playground(env, agent)
        pg.fit(1)
Example #2
0
import learnrl as rl
import tensorflow as tf
import retro

from agents.a2c import ActorCriticAgent

env = retro.make(game='Airstriker-Genesis')
agent = ActorCriticAgent(env.action_space)

print(env.observation_space, env.action_space)

reward_handler = rl.RewardHandler

pg = rl.Playground(env, agent)
pg.fit(
    100,
    render=True,
    verbose=2,
    titles_on_top=False,
)
pg.test(1, titles_on_top=False)