def on_episode_begin(self, episode, qfunction): mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1) env = Environment(mdp) qlearning.env = env egreedy.action_space = env.actions qlearning.policy.provider = env.actions if episode % 50 == 0: print('Episode {}'.format(episode))
from capstone.datasets.ucic4 import get_random_game, get_random_loss_game from capstone.game.games import Connect4 as C4 from capstone.game.players import RandPlayer from capstone.rl import Environment, GameMDP, FixedGameMDP from capstone.rl.learners import ApproximateQLearning as ApproxQLearning from capstone.rl.policies import EGreedy, RandomPolicy from capstone.rl.utils import EpisodicWLDPlotter, Callback, LinearAnnealing from capstone.rl.value_functions.c4deepnetwork import Connect4DeepNetwork import numpy as np import random seed = 383 random.seed(seed) np.random.seed(seed) mdp = FixedGameMDP(get_random_game(), RandPlayer(random_state=seed), 1) env = Environment(mdp) c4dn = Connect4DeepNetwork() egreedy = EGreedy(action_space=env.actions, qfunction=c4dn, epsilon=1.0, selfplay=False, random_state=seed) qlearning = ApproxQLearning(env=env, qfunction=c4dn, policy=egreedy, discount_factor=0.99, selfplay=False, experience_replay=True, replay_memory_size=20000, batch_size=32)