Example #1
0
from osero import Osero

env = Osero(board_size=6)
exit()
from logger import EpisodeLogger
from model import NetworkModel
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

env = Osero(board_size=4)
nb_actions = env.action_space.n

model = NetworkModel.create_simple_nn(env)
print(model.summary())

memory = SequentialMemory(limit=10000, window_length=1)

policy = EpsGreedyQPolicy(eps=0.1)
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=100,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
# dqn.load_weights('results/190621/10000.h5')
history = dqn.fit(env,
                  nb_steps=10000,