def alpha_beta_move(board,active_turn,depth,alpha = 2): swap_dict = {1:-1,-1:1} dummy_board = np.zeros((6,7)) dummy_board[:] = board[:] options = cccc.available_moves(board) random.shuffle(options) if len(options) == 1: dummy_board[np.where(dummy_board[:,options[0]]==0)[0][-1],options[0]] = active_turn if cccc.winner(dummy_board): return (1,options[0]+1) else: return (0,options[0]+1) if depth ==0: return (0, options[np.random.randint(len(options))]+1) best_value = -2 candidate_move = None for x in options: height = np.where(dummy_board[:,x]==0)[0][-1] dummy_board[height,x] = active_turn if cccc.winner(dummy_board): return (1, x+1) (opp_value,opp_move) = alpha_beta_move(dummy_board,swap_dict[active_turn],depth-1,-best_value) if -opp_value > best_value: candidate_move = x+1 best_value = -opp_value if -opp_value >= alpha: #print (options, x, best_value, alpha) break dummy_board[height,x] = 0 return (best_value, candidate_move)
def alpha_beta_move(board, active_turn, depth, evaluation=lambda x: 0, alpha=2): swap_dict = {1: -1, -1: 1} dummy_board = np.copy(board) dummy_board = dummy_board.reshape((6, 7)) options = cccc.available_moves(dummy_board) random.shuffle(options) if len(options) == 1: dummy_board[np.where(dummy_board[:, options[0]] == 0)[0][-1], options[0]] = active_turn if cccc.winner(dummy_board): return (1, options[0] + 1) else: return (0, options[0] + 1) if depth == 0: best_value = -2 for x in options: height = np.where(dummy_board[:, x] == 0)[0][-1] dummy_board[height, x] = active_turn eval_board = evaluation(dummy_board * active_turn) if eval_board > best_value: best_value = eval_board candidate_move = x + 1 dummy_board[height, x] = 0 return (best_value, candidate_move) best_value = -2 candidate_move = None for x in options: height = np.where(dummy_board[:, x] == 0)[0][-1] dummy_board[height, x] = active_turn if cccc.winner(dummy_board): return (1, x + 1) (opp_value, opp_move) = alpha_beta_move(dummy_board, swap_dict[active_turn], depth - 1, evaluation, -best_value) if -opp_value > best_value: candidate_move = x + 1 best_value = -opp_value if -opp_value >= alpha: #print (options, x, best_value, alpha) break dummy_board[height, x] = 0 return (best_value, candidate_move)
def play_cccc(): print "requesting move" player_index_dict = {-1: 1, 1: 0} player = int(request.form.get("player")) depths = map(int, request.form.get("depths").split(',')) print "the depths are ", depths board = request.form.get("board") board = board.split(",") board = [int(x) for x in board] board = np.array(board).reshape((6, 7)) print "the board is " print board types = request.form.get("types") types = map(lambda x: x.replace("\"", ""), types.split(",")) evals = request.form.get("evals").split(",") print "the eval method is ", evals[player_index_dict[player]] if fc4.game_over(np.copy(board).reshape((6, 7))): finished = cccc.winner(board.reshape((6, 7))) return jsonify(finished=finished) else: finished = -2 if evals[player_index_dict[player]] == 'nn': evaluation = fc4.net_value else: evaluation = lambda x: 0 #evaluation print "the control methods are ", types # print types, player, fc4.game_over(np.copy(board)), evals, depths[player_index_dict[player]] if types[player_index_dict[player]] == 'remote' and not fc4.game_over( np.copy(board)): move = fc4.alpha_beta_move(board.reshape((6, 7)), player, depth=depths[player_index_dict[player]], evaluation=evaluation)[1] print "the next move is ", move fc4.update_move(board, move, player) print "the board is " print board player *= -1 board = board.reshape(42) print 'next move is', move if fc4.game_over(np.copy(board).reshape((6, 7))): finished = cccc.winner(board.reshape((6, 7))) else: finished = -2 return jsonify(move=move, player=-1 * player, finished=finished)
def play_cccc(): print "requesting move" player_index_dict = {-1:1,1:0} player = int(request.form.get("player")) depths = map(int,request.form.get("depths").split(',')) print "the depths are ", depths board = request.form.get("board") board = board.split(",") board = [int(x) for x in board] board = np.array(board).reshape((6,7)) print "the board is " print board types = request.form.get("types") types = map(lambda x: x.replace("\"",""),types.split(",")) evals = request.form.get("evals").split(",") print "the eval method is ",evals[player_index_dict[player]] if fc4.game_over(np.copy(board).reshape((6,7))): finished = cccc.winner(board.reshape((6,7))) return jsonify(finished = finished) else: finished = -2 if evals[player_index_dict[player]] == 'nn': evaluation = fc4.net_value else: evaluation = lambda x: 0 #evaluation print "the control methods are ", types # print types, player, fc4.game_over(np.copy(board)), evals, depths[player_index_dict[player]] if types[player_index_dict[player]] == 'remote' and not fc4.game_over(np.copy(board)): move = fc4.alpha_beta_move(board.reshape((6,7)), player, depth = depths[player_index_dict[player]], evaluation = evaluation)[1] print "the next move is ",move fc4.update_move(board,move,player) print "the board is " print board player *= -1 board = board.reshape(42) print 'next move is', move if fc4.game_over(np.copy(board).reshape((6,7))): finished = cccc.winner(board.reshape((6,7))) else: finished = -2 return jsonify(move=move, player = -1*player, finished = finished)
def get_max_future(future_board,value_fun): options = cccc.available_moves(future_board) dummy_board = np.copy(future_board) move_values = np.zeros(7) for move in options: dummy_board = np.copy(future_board) dummy_board[np.where(dummy_board[:,move]==0)[0][-1],move] = -1 # dummy_board = dummy_board.reshape(1,42) if cccc.winner(dummy_board): move_values[move] = cccc.winner(dummy_board) else: reshapable = np.copy(dummy_board) reshapable = reshapable.reshape(1,42) move_values[move] = value_fun(reshapable) available_move_values = np.array([move_values[move] for move in options]) dummy_board = np.copy(future_board) options_index = np.argmin(available_move_values) dummy_board[np.where(dummy_board[:,options[options_index]]==0)[0][-1],options[options_index]] = -1 return np.amin(available_move_values), dummy_board
def alpha_beta_move(board,active_turn,depth,evaluation = lambda x: 0,alpha = 2): swap_dict = {1:-1,-1:1} dummy_board = np.copy(board) dummy_board = dummy_board.reshape((6,7)) options = cccc.available_moves(dummy_board) random.shuffle(options) if len(options) == 1: dummy_board[np.where(dummy_board[:,options[0]]==0)[0][-1],options[0]] = active_turn if cccc.winner(dummy_board): return (1,options[0]+1) else: return (0,options[0]+1) if depth ==0: best_value = -2 for x in options: height = np.where(dummy_board[:,x]==0)[0][-1] dummy_board[height,x] = active_turn eval_board = evaluation(dummy_board*active_turn) if eval_board > best_value: best_value = eval_board candidate_move = x + 1 dummy_board[height,x] = 0 return (best_value, candidate_move) best_value = -2 candidate_move = None for x in options: height = np.where(dummy_board[:,x]==0)[0][-1] dummy_board[height,x] = active_turn if cccc.winner(dummy_board): return (1, x+1) (opp_value,opp_move) = alpha_beta_move(dummy_board,swap_dict[active_turn],depth-1,evaluation,-best_value) if -opp_value > best_value: candidate_move = x+1 best_value = -opp_value if -opp_value >= alpha: #print (options, x, best_value, alpha) break dummy_board[height,x] = 0 return (best_value, candidate_move)
def go(): if request.method == 'GET': board = app.board player = app.player print "HOWDY" return render_template('connect_four.html', board = board, cplayer = player, finished = -2 ) if request.method == 'POST': player = int(request.form.get("player")) board = request.form.get("board") board = board.split(",") #print board board = [int(x) for x in board] board = np.array(board) #print board,player if game_over(np.copy(board)): if cccc.winner(board.reshape((6,7)))==1: print '1' return render_template('connect_four.html', board = list(board), cplayer = player, finished = 1) if cccc.winner(board.reshape((6,7))) ==0: print '0' return render_template('connect_four.html', board = list(board), cplayer = player, finished = 0) if cccc.winner(board.reshape((6,7))) == -1: print '-1' return render_template('connect_four.html', board = list(board), cplayer = player, finished = -1) while not game_over(board): if player == -1: _,move = alpha_beta_move(board,player,depth=2, evaluation = net_value) #print move #print game_over(board) board = board.reshape((6,7)) board[np.where(board[:,move-1]==0)[0][-1],move-1] = active_turn = player player = -1*player board = board.reshape(42) #print board,player elif player == 1: #print board, player return render_template('connect_four.html', board = list(board), cplayer = player,finished=-2) if game_over(np.copy(board)): if cccc.winner(board.reshape((6,7)))==1: print '1' return render_template('connect_four.html', board = list(board), cplayer = player, finished = 1) if cccc.winner(board.reshape((6,7))) ==0: print '0' return render_template('connect_four.html', board = list(board), cplayer = player, finished = 0) if cccc.winner(board.reshape((6,7))) == -1: print '-1' return render_template('connect_four.html', board = list(board), cplayer = player, finished = -1)
def play_cccc(): print "requesting move" player_index_dict = {-1:1,1:0} player = int(request.form.get("player")) depths = map(int,request.form.get("depths").split(',')) print "the depths are ", depths board = request.form.get("board") board = board.split(",") board = [int(x) for x in board] board = np.array(board).reshape((6,7)) move =0 print "the board is " print board types = request.form.get("types") types = map(lambda x: x.replace("\"",""),types.split(",")) evals = request.form.get("evals").split(",") print "the eval method is ",evals[player_index_dict[player]] try: print request.form.get("overcheck") overcheck = request.form.get("overcheck")=="true" print overcheck if overcheck: if cccc.game_over(board): finished = cccc.winner(board.reshape((6,7))) else: finished = -2 winners = map(list,zip(*fc4.winning_squares(board))) if not winners: winners = [[],[]] print winners return jsonify(finished = finished, y=winners[0],x=winners[1]) except: print "failed" if fc4.game_over(np.copy(board).reshape((6,7))): finished = cccc.winner(board.reshape((6,7))) winners = map(list,zip(*fc4.winning_squares(board))) if not winners: winners = [[],[]] print winners return jsonify(finished = finished, y=winners[0],x=winners[1]) else: winners = [] finished = -2 if evals[player_index_dict[player]] == 'nn': evaluation = fc4.sym_net_value else: evaluation = lambda x: 0 #evaluation print "the control methods are ", types # print types, player, fc4.game_over(np.copy(board)), evals, depths[player_index_dict[player]] if types[player_index_dict[player]] == 'remote' and not fc4.game_over(np.copy(board)): move = fc4.alpha_beta_move(board.reshape((6,7)), player, depth = depths[player_index_dict[player]], evaluation = evaluation)[1] print "the next move is ",move fc4.update_move(board,move,player) print "the board is " print board player *= -1 board = board.reshape(42) # print 'next move is', move if fc4.game_over(np.copy(board).reshape((6,7))): finished = cccc.winner(board.reshape((6,7))) winners = map(list,zip(*fc4.winning_squares(board))) if not winners: winners = [[],[]] print winners return jsonify(move=move, player = -1*player, finished = finished, y=winners[0],x=winners[1]) else: finished = -2 winners = [] return jsonify(move=move, player = -1*player, finished = finished)
def game_over(board): board = np.array(board).reshape((6,7)) return cccc.winner(board) or cccc.is_full(board)
def game_over(board): board = np.array(board).reshape((6, 7)) return cccc.winner(board) or cccc.is_full(board)
def go(): if request.method == 'GET': board = app.board player = app.player print "HOWDY" return render_template('connect_four.html', board=board, cplayer=player, finished=-2) if request.method == 'POST': player = int(request.form.get("player")) board = request.form.get("board") board = board.split(",") #print board board = [int(x) for x in board] board = np.array(board) #print board,player if game_over(np.copy(board)): if cccc.winner(board.reshape((6, 7))) == 1: print '1' return render_template('connect_four.html', board=list(board), cplayer=player, finished=1) if cccc.winner(board.reshape((6, 7))) == 0: print '0' return render_template('connect_four.html', board=list(board), cplayer=player, finished=0) if cccc.winner(board.reshape((6, 7))) == -1: print '-1' return render_template('connect_four.html', board=list(board), cplayer=player, finished=-1) while not game_over(board): if player == -1: _, move = alpha_beta_move(board, player, depth=2, evaluation=net_value) #print move #print game_over(board) board = board.reshape((6, 7)) board[np.where(board[:, move - 1] == 0)[0][-1], move - 1] = active_turn = player player = -1 * player board = board.reshape(42) #print board,player elif player == 1: #print board, player return render_template('connect_four.html', board=list(board), cplayer=player, finished=-2) if game_over(np.copy(board)): if cccc.winner(board.reshape((6, 7))) == 1: print '1' return render_template('connect_four.html', board=list(board), cplayer=player, finished=1) if cccc.winner(board.reshape((6, 7))) == 0: print '0' return render_template('connect_four.html', board=list(board), cplayer=player, finished=0) if cccc.winner(board.reshape((6, 7))) == -1: print '-1' return render_template('connect_four.html', board=list(board), cplayer=player, finished=-1)
def alpha_beta_move(board, turn, depth = 0, alpha = (-inf,-inf), beta = (inf,inf), evaluation = lambda x: 0): dummy_board = np.copy(board) # we don't want to change the board state swap_player = {1:-1,-1:1} # So we can change whose turn options = cccc.available_moves(board) # get legal moves random.shuffle(options) # should inherit move order instead of randomizing # if len(options) == 1: # update_move(board,options[0]) # if cccc.winner(dummy_board): # return (inf,options[0]) # else: # return (0,options[0]) best_value = (-inf,-inf) if not options: print board, cccc.game_over(board) print 'oops, no available moves' cand_move = options[0] if depth == 0: for x in options: update_move(dummy_board,x,turn) op_value = (evaluation(dummy_board*swap_player[turn]) , depth) if tuple(-1 * el for el in op_value) > best_value: cand_move = x best_value = tuple(-1 * el for el in op_value) alpha = max(alpha, best_value) # print depth,-op_value, best_value, cand_move,alpha,beta if alpha >= beta: # print 'pruned' break #alpha-beta cutoff unupdate_move(dummy_board,x) else: for x in options: # dummy_board = np.copy(board) # height= np.where(board[:,x]==0)[0][-1] #connect four only # dummy_board[height, x] = turn update_move(dummy_board,x,turn) if cccc.winner(dummy_board): #should check over and tied too return((inf,depth), x) if cccc.is_full(dummy_board): #This assumes you can't lose on your turn return((0,depth) , x) op_value,_ = alpha_beta_move( dummy_board, swap_player[turn], depth-1, alpha = tuple(-1 * el for el in beta), beta = tuple(-1 * el for el in alpha), evaluation = evaluation) if tuple(-1 * el for el in op_value) > best_value: cand_move = x best_value = tuple(-1 * el for el in op_value) alpha = max(alpha, best_value) # print depth,-op_value, best_value, cand_move,alpha,beta if alpha >= beta: # print 'pruned' break #alpha-beta cutoff unupdate_move(dummy_board,x) # dummy_board[height, x] = 0 return (best_value, cand_move)
def game_over(board): return cccc.winner(board) or cccc.is_full(board)
game_length = len(result_O.log) input_list = [-1*board_list[2*i+1] for i in range(game_length/2)] output_list = [-1*board_list[2*i+2] for i in range(game_length/2)] move_list = [result_O.log[2*i+1] for i in range(game_length/2)] for epoch in range(train_duration): t1 = time.clock() if len(input_history) > minimum_data: target_history = np.zeros(len(output_history)) print 'Creating Targets for {} data points'.format(len(output_history)) print '\n' t3 = time.clock() for i,item in enumerate(output_history): output_state = np.copy(output_history[i]) if cccc.winner(output_state) or cccc.is_full(output_state): target_history[i] = cccc.winner(output_state) else: #minus because the future term is in terms of the valuation for the player, and we need a target for the #opponent # targets[i] = (1-future_discount) * reward_state + future_discount * get_max_future( #output_state,value_fun) #targets = np.array(targets).reshape(BATCH_SIZE,1) #temporal difference method target_history[i]= 0 current_state = np.copy(output_state) depth = 0 player = 1