def test_keras_pipeline(): class MyEstimator(KerasEstimator): def build(self): self.model = tf.keras.models.Sequential() self.model.add( tf.keras.layers.Dense(self.observation_size, activation='relu', input_shape=self.observation_shape)) self.model.add(tf.keras.layers.Dense(self.action_size)) self.model.compile( tf.keras.optimizers.Adam(learning_rate=self.learning_rate), loss='mse') def preprocess(self, observations, actions): return observations env = CatchEnv() custom_action_value = MyEstimator( observation_space=env.observation_space, action_space=env.action_space, batch_size=64, freezed_steps=20) agent = StandardAgent(observation_space=env.observation_space, action_space=env.action_space, action_values=custom_action_value) pg = rl.Playground(env, agent) pg.fit(1)
import learnrl as rl import tensorflow as tf import retro from agents.a2c import ActorCriticAgent env = retro.make(game='Airstriker-Genesis') agent = ActorCriticAgent(env.action_space) print(env.observation_space, env.action_space) reward_handler = rl.RewardHandler pg = rl.Playground(env, agent) pg.fit( 100, render=True, verbose=2, titles_on_top=False, ) pg.test(1, titles_on_top=False)