Exemple #1
0
    def __init__(self):
        self.env = game_environment.ConnectXEnvironment(7, 6, 4)

        self.trainer = 0

        self.columns = self.env.num_columns
        self.rows = self.env.num_rows
        self.actions = gym.spaces.Discrete(self.columns)
        self.positions = gym.spaces.Discrete(self.columns * self.rows)
        self.list_of_trainers = ["new2", "new", "new1", "new3", "new4"]
        self.score_list = {i: 0 for i in self.list_of_trainers}
        self.games_list = {i: 0 for i in self.list_of_trainers}
        self.change_trainer_at_random()
Exemple #2
0
    def __init__(self):
        self.env = game_environment.ConnectXEnvironment(7, 6, 4)

        self.trainer = 0

        self.columns = self.env.num_columns
        self.rows = self.env.num_rows
        self.actions = gym.spaces.Discrete(self.columns)
        self.positions = gym.spaces.Discrete(self.columns * self.rows)
        self.list_of_trainers = [
            "variety14.0", "variety5.0", "variety3.0", "variety4.0",
            "variety8.0", "variety9.0", "variety19.0",
            "lookahead_vs_verticalbot1", "variety20.0", "variety12.0",
            "variety13.0"
        ]
        self.score_list = {i: 0 for i in self.list_of_trainers}
        self.games_list = {i: 0 for i in self.list_of_trainers}
        self.change_trainer_at_random()
Exemple #3
0
    def __init__(self, num_states, num_actions, gamma, max_exp, min_exp,
                 batch_size, learning_rate):
        self.num_actions = num_actions
        self.batch_size = batch_size
        self.gamma = gamma
        self.model = network.ConnectXNetwork2(num_states, num_actions)
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.criterion = nn.MSELoss()
        self.mark = 1  #placeholder for verticalbot funticonality
        self.name = 0
        self.EVALenv = game_environment.ConnectXEnvironment(7, 6, 4)

        self.experience = {
            'prev_obs': [],
            'a': [],
            'r': [],
            'obs': [],
            'done': []
        }

        self.max_exp = max_exp
        self.min_exp = min_exp
Exemple #4
0
def dojo(games, gym, TrainNet, TargetNet, min_epsilon, epsilon, copy_step):
    total_loss = 0
    even_match = 0
    test_match = game_environment.ConnectXEnvironment(7, 6, 4)
    _, test_state = test_match.reset()
    print(TrainNet.predict(test_state))
    decay = 0.9995
    for i in range(games):
        rewards, loss = gym.generate_data(TrainNet, TargetNet, epsilon,
                                          copy_step)
        if rewards == 0:
            even_match += 1
        print("motstander", gym.trainer.mark)
        print("SCORE:", rewards)
        gym.score_list[gym.trainer.name] += rewards
        gym.games_list[gym.trainer.name] += 1
        total_loss += loss
        print(i)
        if i % 10 == 0 and i != 0:
            gym.change_trainer_at_random()
            print(TrainNet.predict(test_state))
        if i % 2 == 0 and i != 0:
            epsilon = max(min_epsilon, epsilon * decay)
        if i % 100 == 0 and i != 0:
            print('Total Loss:', total_loss)
            print('Even matches:', even_match)
            gym.print_scores()
            gym.reset_scores()
            even_match = 0
            total_loss = 0
            print("games", i)
            print("epsilon", epsilon)
        if i % copy_step == 0:
            TargetNet.copy_weights(TrainNet)
        if i % 50000 == 0 and i != 0:
            plt = plot_grad_flow(TrainNet.model.named_parameters())
            path = "plot" + str(i) + ".png"
            plt.savefig(path)
Exemple #5
0
 def __init__(self, num_states, num_actions):
     self.num_actions = num_actions
     self.model = network.ConnectXNetwork2(num_states, num_actions)
     self.mark = 6
     self.name = 0
     self.EVALenv = game_environment.ConnectXEnvironment(7, 6, 4)
Exemple #6
0
                    [self.side_length + w * 100 + 50, h * 100 + 100], 45)
            if self.game.board[space] == 2:
                pygame.draw.circle(
                    self.screen, self.blue,
                    [self.side_length + w * 100 + 50, h * 100 + 100], 45)
            w += 1
            if w == self.width:
                w = 0
                h += 1
        pygame.event.pump()
        pygame.display.flip()


gamma = 0.99
copy_step = 25
max_exp = 100000
min_exp = 100
batch_size = 32
learning_rate = 0.00146
epsilon = 0.05
decay = 0.999
min_epsilon = 0.01
episodes = 200000

template_gym = DQN2.ConnectXGym2()
Opponent = DQN2.DQN(template_gym.positions.n, template_gym.actions.n, gamma,
                    max_exp, min_exp, batch_size, learning_rate)
Opponent.load_weights('lookahead_vs_verticalbot2')
game = game_environment.ConnectXEnvironment(7, 6, 4)
GameRendering(game, Opponent, 0)