def alpha_beta_move(board,active_turn,depth,alpha = 2): swap_dict = {1:-1,-1:1} dummy_board = np.zeros((6,7)) dummy_board[:] = board[:] options = cccc.available_moves(board) random.shuffle(options) if len(options) == 1: dummy_board[np.where(dummy_board[:,options[0]]==0)[0][-1],options[0]] = active_turn if cccc.winner(dummy_board): return (1,options[0]+1) else: return (0,options[0]+1) if depth ==0: return (0, options[np.random.randint(len(options))]+1) best_value = -2 candidate_move = None for x in options: height = np.where(dummy_board[:,x]==0)[0][-1] dummy_board[height,x] = active_turn if cccc.winner(dummy_board): return (1, x+1) (opp_value,opp_move) = alpha_beta_move(dummy_board,swap_dict[active_turn],depth-1,-best_value) if -opp_value > best_value: candidate_move = x+1 best_value = -opp_value if -opp_value >= alpha: #print (options, x, best_value, alpha) break dummy_board[height,x] = 0 return (best_value, candidate_move)
def alpha_beta_move(board, active_turn, depth, evaluation=lambda x: 0, alpha=2): swap_dict = {1: -1, -1: 1} dummy_board = np.copy(board) dummy_board = dummy_board.reshape((6, 7)) options = cccc.available_moves(dummy_board) random.shuffle(options) if len(options) == 1: dummy_board[np.where(dummy_board[:, options[0]] == 0)[0][-1], options[0]] = active_turn if cccc.winner(dummy_board): return (1, options[0] + 1) else: return (0, options[0] + 1) if depth == 0: best_value = -2 for x in options: height = np.where(dummy_board[:, x] == 0)[0][-1] dummy_board[height, x] = active_turn eval_board = evaluation(dummy_board * active_turn) if eval_board > best_value: best_value = eval_board candidate_move = x + 1 dummy_board[height, x] = 0 return (best_value, candidate_move) best_value = -2 candidate_move = None for x in options: height = np.where(dummy_board[:, x] == 0)[0][-1] dummy_board[height, x] = active_turn if cccc.winner(dummy_board): return (1, x + 1) (opp_value, opp_move) = alpha_beta_move(dummy_board, swap_dict[active_turn], depth - 1, evaluation, -best_value) if -opp_value > best_value: candidate_move = x + 1 best_value = -opp_value if -opp_value >= alpha: #print (options, x, best_value, alpha) break dummy_board[height, x] = 0 return (best_value, candidate_move)
def get_max_future(future_board,value_fun): options = cccc.available_moves(future_board) dummy_board = np.copy(future_board) move_values = np.zeros(7) for move in options: dummy_board = np.copy(future_board) dummy_board[np.where(dummy_board[:,move]==0)[0][-1],move] = -1 # dummy_board = dummy_board.reshape(1,42) if cccc.winner(dummy_board): move_values[move] = cccc.winner(dummy_board) else: reshapable = np.copy(dummy_board) reshapable = reshapable.reshape(1,42) move_values[move] = value_fun(reshapable) available_move_values = np.array([move_values[move] for move in options]) dummy_board = np.copy(future_board) options_index = np.argmin(available_move_values) dummy_board[np.where(dummy_board[:,options[options_index]]==0)[0][-1],options[options_index]] = -1 return np.amin(available_move_values), dummy_board
def alpha_beta_move(board,active_turn,depth,evaluation = lambda x: 0,alpha = 2): swap_dict = {1:-1,-1:1} dummy_board = np.copy(board) dummy_board = dummy_board.reshape((6,7)) options = cccc.available_moves(dummy_board) random.shuffle(options) if len(options) == 1: dummy_board[np.where(dummy_board[:,options[0]]==0)[0][-1],options[0]] = active_turn if cccc.winner(dummy_board): return (1,options[0]+1) else: return (0,options[0]+1) if depth ==0: best_value = -2 for x in options: height = np.where(dummy_board[:,x]==0)[0][-1] dummy_board[height,x] = active_turn eval_board = evaluation(dummy_board*active_turn) if eval_board > best_value: best_value = eval_board candidate_move = x + 1 dummy_board[height,x] = 0 return (best_value, candidate_move) best_value = -2 candidate_move = None for x in options: height = np.where(dummy_board[:,x]==0)[0][-1] dummy_board[height,x] = active_turn if cccc.winner(dummy_board): return (1, x+1) (opp_value,opp_move) = alpha_beta_move(dummy_board,swap_dict[active_turn],depth-1,evaluation,-best_value) if -opp_value > best_value: candidate_move = x+1 best_value = -opp_value if -opp_value >= alpha: #print (options, x, best_value, alpha) break dummy_board[height,x] = 0 return (best_value, candidate_move)
def value_move(board,active_turn,output_fun,exploration = 0): board = board.reshape((1,42)) X_sym = theano.tensor.matrix() y_sym = theano.tensor.ivector() dummy_board = active_turn * board[:] options = cccc.available_moves(dummy_board) if exploration > random.random(): move = random.choice(options) else: move_values = np.zeros(42) for move in options: dummy_board = active_turn * board[:] dummy_board[0][move] = 1 move_values[move] = -1 * output_fun(-1* dummy_board) available_move_values = np.array([move_values[move] for move in options]) move = options[available_move_values.argmax(-1)] return move + 1
def alpha_beta_move(board, turn, depth = 0, alpha = (-inf,-inf), beta = (inf,inf), evaluation = lambda x: 0): dummy_board = np.copy(board) # we don't want to change the board state swap_player = {1:-1,-1:1} # So we can change whose turn options = cccc.available_moves(board) # get legal moves random.shuffle(options) # should inherit move order instead of randomizing # if len(options) == 1: # update_move(board,options[0]) # if cccc.winner(dummy_board): # return (inf,options[0]) # else: # return (0,options[0]) best_value = (-inf,-inf) if not options: print board, cccc.game_over(board) print 'oops, no available moves' cand_move = options[0] if depth == 0: for x in options: update_move(dummy_board,x,turn) op_value = (evaluation(dummy_board*swap_player[turn]) , depth) if tuple(-1 * el for el in op_value) > best_value: cand_move = x best_value = tuple(-1 * el for el in op_value) alpha = max(alpha, best_value) # print depth,-op_value, best_value, cand_move,alpha,beta if alpha >= beta: # print 'pruned' break #alpha-beta cutoff unupdate_move(dummy_board,x) else: for x in options: # dummy_board = np.copy(board) # height= np.where(board[:,x]==0)[0][-1] #connect four only # dummy_board[height, x] = turn update_move(dummy_board,x,turn) if cccc.winner(dummy_board): #should check over and tied too return((inf,depth), x) if cccc.is_full(dummy_board): #This assumes you can't lose on your turn return((0,depth) , x) op_value,_ = alpha_beta_move( dummy_board, swap_player[turn], depth-1, alpha = tuple(-1 * el for el in beta), beta = tuple(-1 * el for el in alpha), evaluation = evaluation) if tuple(-1 * el for el in op_value) > best_value: cand_move = x best_value = tuple(-1 * el for el in op_value) alpha = max(alpha, best_value) # print depth,-op_value, best_value, cand_move,alpha,beta if alpha >= beta: # print 'pruned' break #alpha-beta cutoff unupdate_move(dummy_board,x) # dummy_board[height, x] = 0 return (best_value, cand_move)