def get_next_move(self, m: mc.Mancala) -> int: state = m.get_board_status() # state[mc.PLAYER1_BANK] = 0 # state[mc.PLAYER2_BANK] = 0 test_m = mc.Mancala() test_m.set_board_status(self.p, state) best_score = -100 best_move = -1 moves = test_m.get_valid_moves() for i in moves: # check the minimax value of each valid move # print("top level: move %i" % i) test_m.set_board_status(self.p, state) test_m.play_turn(i) test_score = self.minimax(test_m, self._depth) # best_score, best_move = max((best_score, best_move), (test_score, i), key=lambda x: x[0]) if test_score > best_score: best_score = test_score best_move = i print("top level: move %i, score %i" % (i, test_score)) return best_move
import tensorflow as tf, mancala, numpy as np, random from collections import deque import matplotlib.pyplot as plt game = mancala.Mancala() retrain = False model = tf.keras.Sequential([ tf.keras.layers.Dense(16, activation=tf.nn.relu, input_shape=(14, )), tf.keras.layers.Dense(16, activation=tf.nn.relu), tf.keras.layers.Dense(16, activation=tf.nn.relu), tf.keras.layers.Dense(14) ]) learning_rate = 1e-3 optimizer = tf.keras.optimizers.Adam(learning_rate) model.compile(loss=tf.keras.losses.mean_squared_error.__name__, optimizer=optimizer) num_episodes = 2000 num_turns_per_episode = 500 epsilon = 1.0 epsilon_min = 0.5 epsilon_decay = 0.995 gamma = 0.95 if retrain: fig = plt.figure()
import reinforcement_learning_model as avm import mancala as mancala import matplotlib.pyplot as plt if __name__ == "__main__": manc = mancala.Mancala() #create an instance of our rl agent without any discounting factor (we don't care if we win in 5 steps or 10) rl_agent = avm.ActionValueModel(epsilon = 0.5, discounting_factor = lambda a : 1) #map out valid actions for each player actions_bottom = [(1, "bottom"), (2, "bottom"), (3, "bottom"), (4, "bottom"), (5, "bottom"), (6, "bottom")] actions_top = [(7, "top"), (8, "top"), (9, "top"), (10, "top"),
# --------------------------------------------------------------------------- # EVOLUTION LOOP: for gen in range(start_gen, end_gen): # Resets agent score so old agents don't get an advantage. for agent in population: agent.score = 0 # Matches up all the agents. matchups = [(a, b) for a in population for b in population] for players in matchups: # Creates the mancala game object for each matchup. game = mncl.Mancala() # Agents take turns chosing stones. while game.is_active: # Takes the game board and changes into a column vector for the agents to process. board_input = np.array(game.board).reshape((len(game.board), 1)) # Calculates output of the neural network for the given boardstate input. output = players[game.player].choose(board_input) # Reshapes the output into a list of prioritised choices. choices = np.argsort(output.reshape((1, 6))) # Test each choice in turn until it finds a valid move. n = 0 while not game.valid_choice(choices[0][n]):
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Jul 13 11:20:54 2020 @author: johanna """ import numpy as np import random import mancala as m import matplotlib.pyplot as plt ma = m.Mancala() #ma.net.generate_random_network([6,12,6,2]) #input() ma.name = 'testeinfach1' print("Start") #print(ma.net.biases) print(ma.spielfeld) ma.print_spielfeld() #ma.train_net(1,10,0.1,5) print("trained") #print(ma.play()) print('play gegen Random') ma.net.generate_random_network([14, 10, 6]) ma.name = 'testeinfach1' print("Start") ma.print_spielfeld() Spieler1 = np.array([0])
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Jul 15 14:15:22 2020 @author: johanna """ import numpy as np import mancala as m ma = m.Mancala(exploration_rate=0.2) print("Start") print(ma.net.biases) print(ma.spielfeld[0:12]) #ma.train_net(500,25,1) print("trained") #print(ma.play()) print('play gegen Random') matest = m.Mancala(exploration_rate=0.8) matest.net.load_network_from_files("Test") Spieler1gewonnen = 0 Spieler2gewonnen = 0 unentschieden = 0 for i in range(1, 2): #print(i) while not ( np.array_equal(matest.spielfeld[0:6], [0, 0, 0, 0, 0, 0]) or np.array_equal(matest.spielfeld[6:12], [0, 0, 0, 0, 0, 0])
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Jul 13 11:20:54 2020 @author: johanna """ import numpy as np import random import mancala as m matest = m.Mancala(exploration_rate = 0.3, name = "Test", network_layers = 4) #lasse Netz als Spieler 1 gegen Random spielen Spieler1gewonnen = 0 Spieler2gewonnen = 0 unentschieden = 0 k = 50000 for i in range (1,k): #print(i) spielfelder = [] while not(np.array_equal(matest.spielfeld[0:6] ,[0,0,0,0,0,0]) or np.array_equal(matest.spielfeld[6:12] ,[0,0,0,0,0,0]) or matest.spielfeld[12]>36 or matest.spielfeld[13]>36): #Spieler 1 netz feld = matest.get_next_action(matest.spielfeld) matest.spielfeld, reward = matest.get_spielfeld_and_reward_after_action(matest.spielfeld, feld) spielfelder.append(matest.spielfeld)
import Network as net import mancala as man import numpy as np import os a = man.Mancala(exploration_rate=1.0) a.name = "Net" a.name2 = "Net2" a.net.generate_random_network([14, 14, 14, 14, 14, 6]) a.net2.generate_random_network([14, 14, 14, 14, 14, 6]) rate = [] size = 1000 for i in range(size): print("Training") a.train_dq(100, 10, 0.2, 10) a.exploration_rate -= 1 / (size + size / 10) rate.append(a.get_win_rate(1000)) np.savetxt("rate.csv", rate, delimiter=',')
import mancala import minimax m = mancala.Mancala("Human", "CPU") ai = minimax.MiniMax(mancala.PLAYER2, depth=5) while not m.is_game_over(): try: print(m) if m.current_player() == mancala.PLAYER1: move = int( input("current player: %s" % m.get_current_player_name())) else: print("getting move from AI") move = ai.get_next_move(m) print("AI chose %i" % move) m.play_turn(move) except Exception as e: print(e.message) print(m.get_winner() + " wins!")