def rollout(self, gamestate, to_play): while (connectFour.checkWinner(gamestate) == 2): connectFour.play(gamestate, to_play, connectFour.random_valid(gamestate)) to_play *= -1 #print "Rollout complete, winner is: " + str(connectFour.checkWinner(gamestate)) return connectFour.checkWinner(gamestate) * to_play
def compete(net1, net2, print_board=False): board = np.zeros((6, 7)) while (connectFour.checkWinner(board) == 2): connectFour.play(board, 1, minimax.pickMove(board, 1, 2, net1)) if print_board: connectFour.print_board(board) if not connectFour.checkWinner(board) == 2: break connectFour.play(board, -1, minimax.pickMove(board, -1, 2, net2)) if print_board: connectFour.print_board(board) return connectFour.checkWinner(board)
def self_play(self, depth): board = np.zeros((6, 7)) to_play = 1 while (connectFour.checkWinner(board) == 2): # connectFour.print_board(board) if tuple(map(tuple, board)) not in self.__gamestates: result = self.neural_net.feedforward(board) v_prime = result[:-1] self.__gamestates[tuple(map(tuple, board))] = np.array([[0.0] * 7, [0.0] * 7, v_prime, to_play]) #for each move, build the tree from root gamestate for i in range(depth): self.select(copy.copy(board)) connectFour.play( board, to_play, self.__gamestates[tuple(map(tuple, board))][1].index( max(self.__gamestates[tuple(map(tuple, board))][1]))) to_play *= -1 # connectFour.print_board(board) training_set = [] summ = 0 for key, value in self.__gamestates.viewitems(): if np.sum(value[1]) > 15: summ += np.sum(self.__gamestates[key][1]) training_set.append( (np.array([np.array(key)]), np.append(value[1] / np.sum(value[1]), (np.sum(value[0]) / np.sum(value[1]))))) return training_set
def select(self, gamestate): # if terminal node win = connectFour.checkWinner(gamestate) if win != 2: return -1 * win tupled_gamestate = tuple(map(tuple, gamestate)) # get q, n, p, and toplay values stats = self.__gamestates[tupled_gamestate] #shuffle valid moves randomly valid_moves = [] for i in range(7): if connectFour.check_valid(gamestate, i): valid_moves.append(i) np.random.shuffle(valid_moves) move = -1 #calculate upper confidence bound, to pick a move max_ucb = -1 * float("inf") for i in range(0, len(valid_moves)): if max_ucb <= stats[0][valid_moves[i]] * ( 1 - self.exploration_factor ) + self.exploration_factor * stats[2][valid_moves[i]] * math.sqrt( np.sum(stats[1])) / (1 + stats[1][valid_moves[i]]): move = valid_moves[i] #generate new state new_state = connectFour.play(gamestate, stats[3], move) #connectFour.print_board(new_state) tupled_new_state = tuple(map(tuple, new_state)) # if the new gamestate already exists, update q value if tupled_new_state in self.__gamestates: win = self.select(new_state) self.__gamestates[tupled_gamestate][0][move] = ( self.__gamestates[tupled_gamestate][0][move] * self.__gamestates[tupled_gamestate][1][move] + win) / (self.__gamestates[tupled_gamestate][1][move] + 1) self.__gamestates[tupled_gamestate][1][move] += 1 #print self.__gamestates[tupled_gamestate] return -1 * win else: #print "LEAF NODE, starting rollout" result = self.neural_net.feedforward(gamestate) v_prime = result[:-1] new_stats = np.array([[0.0] * 7, [0.0] * 7, v_prime, -1 * stats[3]]) self.__gamestates[tupled_new_state] = new_stats win = self.rollout(gamestate, -1 * stats[3]) # print np.sum(self.__gamestates[tupled_gamestate][1]) return -1 * self.rollout(gamestate, -1 * stats[3])
def alphabeta(node, depth, alpha, beta, player, currPlayer, network): win = connectFour.checkWinner(node) if not win == 2: return win * (1234567 + depth) * player if depth <= 0: v = network.feed_forward(node) if player == 1: return v[0] else: return v[1] # return random.random() validMoves = [] for i in range(7): if connectFour.check_valid(node, i): validMoves.append(i) random.shuffle(validMoves) if player == currPlayer: v = -1 * (1234567 - 50) for i in validMoves: v = max( v, alphabeta(connectFour.play(node, currPlayer, i), depth - 1, alpha, beta, player, -1 * currPlayer, network)) connectFour.unplay(node, i) alpha = max(alpha, v) if beta <= alpha: break #beta cutoff return v elif not player == currPlayer: v = 1 * (1234567 - 50) for i in validMoves: v = min( v, alphabeta(connectFour.play(node, currPlayer, i), depth - 1, alpha, beta, player, -1 * currPlayer, network)) connectFour.unplay(node, i) beta = min(beta, v) if beta <= alpha: break #alpha cutoff return v
training_set = tree.self_play(30) # print training_set # print np.shape(training_set) # print training_set[0][1] cnn.stochastic_gradient_descent(epochs=10, step_size=0.03, mini_batch_size=len(training_set) / 10, training_set=training_set, is_momentum_based=False, friction=0.9) if hyperepoch % 5 == 1: board = np.zeros((6, 7)) while (connectFour.checkWinner(board) == 2): #connectFour.play(board, 1, minimax.pickMove(board, 1, 3, net0)) results = cnn.feedforward(np.array([board]))[:-1] connectFour.play(board, 1, np.where(results == max(results))) connectFour.print_board(board) # raw_input("press") print if not connectFour.checkWinner(board) == 2: break results = cnn.feedforward(np.array([board]))[:-1] connectFour.play(board, -1, np.where(results == max(results))) connectFour.print_board(board) print("WINNER:" + str(connectFour.checkWinner(board))) while True: board = np.zeros((6, 7))