def alpha_beta_move(board,active_turn,depth,alpha = 2): swap_dict = {'X':'O','O':'X'} dummy_board = np.arange(9) dummy_board[:] = board[:] options = ttt.available_moves(board) random.shuffle(options) player_dict = {'X':1, 'O':-1} if len(options) == 1: dummy_board[options[0]] = player_dict[active_turn] if ttt.winner(dummy_board): return (1,options[0]+1) else: return (0,options[0]+1) if depth ==0: return (0, options[np.random.randint(len(options))]+1) best_value = -2 candidate_move = None for x in options: dummy_board[x] = player_dict[active_turn] if ttt.winner(dummy_board): return (1, x+1) (opp_value,opp_move) = alpha_beta_move(dummy_board,swap_dict[active_turn],depth-1,-best_value) if -opp_value > best_value: candidate_move = x+1 best_value = -opp_value if -opp_value >= alpha: #print (options, x, best_value, alpha) break dummy_board[x] = board[x] return (best_value, candidate_move)
def play_ttt(): player_index_dict = {-1:1,1:0} player = int(request.form.get("player")) depth = int(request.form.get("depth")) board = request.form.get("board") board = board.split(",") # print board board = [int(x) for x in board] board = np.array(board) types = str(request.form.get("types")) types = types.split(",") print "The contorl methods are" , types # print types, board, player, ttt.game_over(np.copy(board)) print "the board is ", board print "the current player is ", player if types[player_index_dict[player]] == 'remote' and not ttt.game_over(np.copy(board)): move = ft3.alpha_beta_move(board, -1, depth = depth)[1] print "the move is ",move board[move-1] = player print "the new board is ", board player *= -1 if ttt.game_over(np.copy(board)): if ttt.winner(board)==1: return render_template('tic_tac_toe.html', board = list(board), player = player, types = types,depth = depth, finished = 1) if ttt.winner(board) ==0: return render_template('tic_tac_toe.html', board = list(board), player = player, types = types,depth = depth, finished = 0) if ttt.winner(board) == -1: return render_template('tic_tac_toe.html', board = list(board), player = player, types = types,depth = depth, finished = -1) else: print 'render next move' return render_template('tic_tac_toe.html',board = list(board), player = player, types = types,depth = depth, finished = -2)
def play_ttt(): player_index_dict = {-1: 1, 1: 0} player = int(request.form.get("player")) depth = int(request.form.get("depth")) board = request.form.get("board") board = board.split(",") # print board board = [int(x) for x in board] board = np.array(board) types = str(request.form.get("types")) types = types.split(",") print "The contorl methods are", types # print types, board, player, ttt.game_over(np.copy(board)) print "the board is ", board print "the current player is ", player if types[player_index_dict[player]] == 'remote' and not ttt.game_over( np.copy(board)): move = ft3.alpha_beta_move(board, -1, depth=depth)[1] print "the move is ", move board[move - 1] = player print "the new board is ", board player *= -1 if ttt.game_over(np.copy(board)): if ttt.winner(board) == 1: return render_template('tic_tac_toe.html', board=list(board), player=player, types=types, depth=depth, finished=1) if ttt.winner(board) == 0: return render_template('tic_tac_toe.html', board=list(board), player=player, types=types, depth=depth, finished=0) if ttt.winner(board) == -1: return render_template('tic_tac_toe.html', board=list(board), player=player, types=types, depth=depth, finished=-1) else: print 'render next move' return render_template('tic_tac_toe.html', board=list(board), player=player, types=types, depth=depth, finished=-2)
def start(): global RUNNING game_type = get_game_type() print_board() while RUNNING: if game_type == "Easy" and ttt.TURN == 2: print() coords = ttt_AI.easy_input(ttt.BOARD) elif game_type == "Medium" and ttt.TURN == 2: print() coords = ttt_AI.medium_input(ttt.BOARD) else: coords = take_input() ttt.add_piece(coords) victor = ttt.winner() print_board() if victor != None and victor != "DRAW": RUNNING = False print(victor, " is the winner!") elif victor == "DRAW": RUNNING = False print(victor)
def play_ttt(): player_to_index = {-1:1,1:0} player = int(request.form.get("player")) depths = map(int,request.form.get("depths").split(',')) board = np.array([int(x) for x in request.form.get("board").split(',')]).reshape(9) types = map(lambda x: x.replace("\"",""), request.form.get("types").split(",")) evals = request.form.get("evals").split(",") overcheck = request.form.get("overcheck") == "true" if overcheck: if ttt.game_over(board): finished = ttt.winner(board.reshape(9)) else: finished = -2 winners = map(list,zip(*ft3.winning_squares(board))) if not winners: winners = [[],[]] print winners return jsonify(finished = finished, y=winners[0],x=winners[1]) if evals[player_to_index[player]] == 'nn': #evaluation = lambda x : 0 evaluation = ft3.sym_net_value else: evaluation = lambda x: 0 if types[player_to_index[player]] == 'remote' and not ft3.game_over(np.copy(board)): move = ft3.alpha_beta_move(board.reshape(9), player, depth = depths[player_to_index[player]], evaluation = evaluation)[1] ft3.update_move(board,move,player) print "the board is " print board.reshape((3,3)) player *= -1 if ft3.game_over(np.copy(board)): finished = ttt.winner(board) winners = map(list,zip(*ft3.winning_squares(board))) if not winners: winners = [[],[]] print winners return jsonify(move=move, player = -1*player, finished = finished, y=winners[0],x=winners[1]) else: finished = -2 winners = [] return jsonify(move=move, player = -1*player, finished = finished)
def get_max_future(future_board,value_fun): options = ttt.available_moves(future_board) dummy_board = np.copy(future_board) move_values = np.zeros(9) for move in options: dummy_board = np.copy(future_board) dummy_board[move] = -1 dummy_board = dummy_board.reshape(1,9) if ttt.winner(dummy_board): move_values[move] = ttt.winner(dummy_board) else: move_values[move] = value_fun(dummy_board) available_move_values = np.array([move_values[move] for move in options]) dummy_board = np.copy(future_board) options_index = np.argmin(available_move_values) dummy_board[options[options_index]] = -1 return np.amin(available_move_values), dummy_board
def alpha_beta_move(board, turn, depth = 0, alpha = (-inf,-inf), beta = (inf,inf), evaluation = lambda x: 0): dummy_board = np.copy(board).reshape(9) # we don't want to change the board state swap_player = {1:-1,-1:1} # So we can change whose turn options = ttt.available_moves(board) # get legal moves random.shuffle(options) # should inherit move order instead of randomizing best_value = (-inf,-inf) if not options: print board, cccc.game_over(board) print 'oops, no available moves' cand_move = options[0] if depth == 0: for x in options: update_move(dummy_board,x,turn) op_value = (evaluation(dummy_board*swap_player[turn]) , depth) if tuple(-1 * el for el in op_value) > best_value: cand_move = x best_value = tuple(-1 * el for el in op_value) alpha = max(alpha, best_value) if alpha >= beta: break #alpha-beta cutoff unupdate_move(dummy_board,x) else: for x in options: update_move(dummy_board,x,turn) if ttt.winner(dummy_board): #should check over and tied too return((inf,depth), x) if ttt.is_full(dummy_board): #This assumes you can't lose on your turn return((0,depth) , x) op_value,_ = alpha_beta_move( dummy_board, swap_player[turn], depth-1, alpha = tuple(-1 * el for el in beta), beta = tuple(-1 * el for el in alpha), evaluation = evaluation) if tuple(-1 * el for el in op_value) > best_value: cand_move = x best_value = tuple(-1 * el for el in op_value) alpha = max(alpha, best_value) # print depth,-op_value, best_value, cand_move,alpha,beta if alpha >= beta: # print 'pruned' break #alpha-beta cutoff unupdate_move(dummy_board,x) # dummy_board[height, x] = 0 return (best_value, cand_move)
def monte_carlo(board,epsilon = 0.5,duration = 1,player=1): plays = {} results = {} t0 = time.clock() plays[tuple(board)] = 0 results[tuple(board)]=0 while time.clock()-t0 < duration: current_player = player dummy_board = np.copy(board) branch = [(np.copy(dummy_board),current_player)] while not game_over(dummy_board): options = ttt.available_moves(dummy_board) future_boards = [next_board(dummy_board,move,current_player) for move in options] if all(plays.get(tuple(b)) for b in future_boards): if random.random() > epsilon: dummy_board = random.choice(future_boards) else: #min here because you are maximizing over future boards, which the results are given in terms of the #current player, i.e. the other player. dummy_board = min(future_boards,key = lambda x : results[tuple(x)] / float(plays[tuple(x)])) else: dummy_board = random.choice(future_boards) plays[tuple(dummy_board)] = 0 results[tuple(dummy_board)]=0 current_player *= -1 branch.append((np.copy(dummy_board),current_player)) for b,p in branch: plays[tuple(b)] +=1 results[tuple(b)] += p * ttt.winner(dummy_board) return results[tuple(board)] / float(plays[tuple(board)])
def mc_step(branch,results,epsilon, cutoff = 10000): dummy_board = np.copy(branch[-1]) #To help convergence we will randomly drop stored values #if random.random() < 1/float(cutoff): # results[tuple(dummy_board)] = {'result':0,'plays':0} if not results.get(tuple(dummy_board)): results[tuple(dummy_board)] = {'result':0,'plays':0} board_plays = results[tuple(dummy_board)]['plays'] board_result = results[tuple(dummy_board)]['result'] if game_over(dummy_board): result = ttt.winner(dummy_board) elif board_plays> cutoff: result = results[tuple(dummy_board)]['result'] / float(results[tuple(dummy_board)]['plays']) else: options = ttt.available_moves(dummy_board) future_boards = [next_board(dummy_board,move,1) for move in options] if all(results.get(tuple(-1 * b)) for b in future_boards): if epsilon(board_plays) > random.random(): dummy_board = random.choice(future_boards) else: dummy_board = min(future_boards,key = lambda x : results[tuple(-1 * x)]['result'] / float(results[tuple(-1 * x)]['plays'])) else: dummy_board = random.choice(future_boards) branch.append(-1 * np.copy(dummy_board)) result , _ = mc_step(branch,results,epsilon,cutoff) result = -1 * result return result , branch
def go(): if request.method == 'GET': board = app.board player = app.player print "HOWDY" return render_template('tic_tac_toe.html', board = board, cplayer = player, finished = -2 ) if request.method == 'POST': player = int(request.form.get("player")) board = request.form.get("board") board = board.split(",") board = [int(x) for x in board] board = np.array(board) print board,player if game_over(np.copy(board)): if ttt.winner(board)==1: return render_template('tic_tac_toe.html', board = list(board), cplayer = player, finished = 1) if ttt.winner(board) ==0: return render_template('tic_tac_toe.html', board = list(board), cplayer = player, finished = 0) if ttt.winner(board) == -1: return render_template('tic_tac_toe.html', board = list(board), cplayer = player, finished = -1) while not game_over(board): print "HELLO" if player == -1: _,move = alpha_beta_move(board,player,depth=6) print move board[move-1] = player player = -1*player print board,player elif player == 1: print board, player print 'pleaaaaaaaaase render!!!' return render_template('tic_tac_toe.html', board = list(board), cplayer = player,finished=-2) print 'you shouldnt see this' if game_over(np.copy(board)): if ttt.winner(board)==1: return render_template('tic_tac_toe.html', board = list(board), cplayer = player, finished = 1) if ttt.winner(board) ==0: return render_template('tic_tac_toe.html', board = list(board), cplayer = player, finished = 0) if ttt.winner(board) == -1: return render_template('tic_tac_toe.html', board = list(board), cplayer = player, finished = -1)
def print_winner(board): print('{} Wins!'.format(tic_tac_toe.winner(board)))
def game_over(board): return ttt.winner(board) or ttt.is_full(board)
def random_game(board,turn): dummy_board = np.copy(board) while not (ttt.is_winner(dummy_board) or ttt.is_full(dummy_board)): dummy_board = random_move(dummy_board,turn) turn = -1*turn return ttt.winner(dummy_board)
output_history = output_history+output_list move_history = move_history+move_list #reward_history = reward_history + reward_list # for _ in range(len(input_list)): # reward_history.append(reward) if len(input_history) > 2*minimum_data: target_history = np.zeros(len(output_history)) print 'Creating Targets for {} data points'.format(len(output_history)) print '\n' t3 = time.clock() for i,item in enumerate(output_history): output_state = np.copy(output_history[i]) if ttt.winner(output_state) or ttt.is_full(output_state): target_history[i] = ttt.winner(output_state) else: #minus because the future term is in terms of the valuation for the player, and we need a target for the #opponent # targets[i] = (1-future_discount) * reward_state + future_discount * get_max_future( #output_state,value_fun) #targets = np.array(targets).reshape(BATCH_SIZE,1) #temporal difference method target_history[i]= 0 current_state = np.copy(output_state) depth = 0 player = 1
print("Тест функции tic_tac_toe.legal_moves") legalMovesList = tic_tac_toe.legal_moves(BOARD_ONE) print("Легальные ходы: " + str(legalMovesList)) legalMovesList = tic_tac_toe.legal_moves(BOARD_TWO) print("Легальные ходы 2: " + str(legalMovesList)) legalMovesList = tic_tac_toe.legal_moves(None) legalMovesList = tic_tac_toe.legal_moves([tic_tac_toe.CROSS]) legalMovesList = tic_tac_toe.legal_moves([ "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "" ]) if TEST_WINNING == 1: print("Тест функции tic_tac_toe.winner") tic_tac_toe.display_board(BOARD_ONE) isWinning1 = tic_tac_toe.winner(BOARD_ONE) print("Выигрыш: " + isWinning1) tic_tac_toe.display_board(BOARD_TWO) isWinning2 = tic_tac_toe.winner(BOARD_TWO) if not isWinning2: isWinning2 = "Нет выигрыша" print("Выигрыш: " + isWinning2) tic_tac_toe.display_board(BOARD_THREE) isWinning2 = tic_tac_toe.winner(BOARD_THREE) print("Выигрыш: " + isWinning2) tic_tac_toe.display_board(BOARD_FOUR) isWinning2 = tic_tac_toe.winner(BOARD_FOUR) print("Выигрыш: " + isWinning2)
def go(): if request.method == 'GET': board = app.board player = app.player print "HOWDY" return render_template('tic_tac_toe.html', board=board, cplayer=player, finished=-2) if request.method == 'POST': player = int(request.form.get("player")) board = request.form.get("board") board = board.split(",") board = [int(x) for x in board] board = np.array(board) print board, player if game_over(np.copy(board)): if ttt.winner(board) == 1: return render_template('tic_tac_toe.html', board=list(board), cplayer=player, finished=1) if ttt.winner(board) == 0: return render_template('tic_tac_toe.html', board=list(board), cplayer=player, finished=0) if ttt.winner(board) == -1: return render_template('tic_tac_toe.html', board=list(board), cplayer=player, finished=-1) while not game_over(board): print "HELLO" if player == -1: _, move = alpha_beta_move(board, player, depth=6) print move board[move - 1] = player player = -1 * player print board, player elif player == 1: print board, player print 'pleaaaaaaaaase render!!!' return render_template('tic_tac_toe.html', board=list(board), cplayer=player, finished=-2) print 'you shouldnt see this' if game_over(np.copy(board)): if ttt.winner(board) == 1: return render_template('tic_tac_toe.html', board=list(board), cplayer=player, finished=1) if ttt.winner(board) == 0: return render_template('tic_tac_toe.html', board=list(board), cplayer=player, finished=0) if ttt.winner(board) == -1: return render_template('tic_tac_toe.html', board=list(board), cplayer=player, finished=-1)