def __init__(self): self.env = game_environment.ConnectXEnvironment(7, 6, 4) self.trainer = 0 self.columns = self.env.num_columns self.rows = self.env.num_rows self.actions = gym.spaces.Discrete(self.columns) self.positions = gym.spaces.Discrete(self.columns * self.rows) self.list_of_trainers = ["new2", "new", "new1", "new3", "new4"] self.score_list = {i: 0 for i in self.list_of_trainers} self.games_list = {i: 0 for i in self.list_of_trainers} self.change_trainer_at_random()
def __init__(self): self.env = game_environment.ConnectXEnvironment(7, 6, 4) self.trainer = 0 self.columns = self.env.num_columns self.rows = self.env.num_rows self.actions = gym.spaces.Discrete(self.columns) self.positions = gym.spaces.Discrete(self.columns * self.rows) self.list_of_trainers = [ "variety14.0", "variety5.0", "variety3.0", "variety4.0", "variety8.0", "variety9.0", "variety19.0", "lookahead_vs_verticalbot1", "variety20.0", "variety12.0", "variety13.0" ] self.score_list = {i: 0 for i in self.list_of_trainers} self.games_list = {i: 0 for i in self.list_of_trainers} self.change_trainer_at_random()
def __init__(self, num_states, num_actions, gamma, max_exp, min_exp, batch_size, learning_rate): self.num_actions = num_actions self.batch_size = batch_size self.gamma = gamma self.model = network.ConnectXNetwork2(num_states, num_actions) self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate) self.criterion = nn.MSELoss() self.mark = 1 #placeholder for verticalbot funticonality self.name = 0 self.EVALenv = game_environment.ConnectXEnvironment(7, 6, 4) self.experience = { 'prev_obs': [], 'a': [], 'r': [], 'obs': [], 'done': [] } self.max_exp = max_exp self.min_exp = min_exp
def dojo(games, gym, TrainNet, TargetNet, min_epsilon, epsilon, copy_step): total_loss = 0 even_match = 0 test_match = game_environment.ConnectXEnvironment(7, 6, 4) _, test_state = test_match.reset() print(TrainNet.predict(test_state)) decay = 0.9995 for i in range(games): rewards, loss = gym.generate_data(TrainNet, TargetNet, epsilon, copy_step) if rewards == 0: even_match += 1 print("motstander", gym.trainer.mark) print("SCORE:", rewards) gym.score_list[gym.trainer.name] += rewards gym.games_list[gym.trainer.name] += 1 total_loss += loss print(i) if i % 10 == 0 and i != 0: gym.change_trainer_at_random() print(TrainNet.predict(test_state)) if i % 2 == 0 and i != 0: epsilon = max(min_epsilon, epsilon * decay) if i % 100 == 0 and i != 0: print('Total Loss:', total_loss) print('Even matches:', even_match) gym.print_scores() gym.reset_scores() even_match = 0 total_loss = 0 print("games", i) print("epsilon", epsilon) if i % copy_step == 0: TargetNet.copy_weights(TrainNet) if i % 50000 == 0 and i != 0: plt = plot_grad_flow(TrainNet.model.named_parameters()) path = "plot" + str(i) + ".png" plt.savefig(path)
def __init__(self, num_states, num_actions): self.num_actions = num_actions self.model = network.ConnectXNetwork2(num_states, num_actions) self.mark = 6 self.name = 0 self.EVALenv = game_environment.ConnectXEnvironment(7, 6, 4)
[self.side_length + w * 100 + 50, h * 100 + 100], 45) if self.game.board[space] == 2: pygame.draw.circle( self.screen, self.blue, [self.side_length + w * 100 + 50, h * 100 + 100], 45) w += 1 if w == self.width: w = 0 h += 1 pygame.event.pump() pygame.display.flip() gamma = 0.99 copy_step = 25 max_exp = 100000 min_exp = 100 batch_size = 32 learning_rate = 0.00146 epsilon = 0.05 decay = 0.999 min_epsilon = 0.01 episodes = 200000 template_gym = DQN2.ConnectXGym2() Opponent = DQN2.DQN(template_gym.positions.n, template_gym.actions.n, gamma, max_exp, min_exp, batch_size, learning_rate) Opponent.load_weights('lookahead_vs_verticalbot2') game = game_environment.ConnectXEnvironment(7, 6, 4) GameRendering(game, Opponent, 0)