def game_over(board): board = np.array(board).reshape((6,7)) return cccc.winner(board) or cccc.is_full(board)
def game_over(board): board = np.array(board).reshape((6, 7)) return cccc.winner(board) or cccc.is_full(board)
def game_over(board): return cccc.winner(board) or cccc.is_full(board)
def alpha_beta_move(board, turn, depth = 0, alpha = (-inf,-inf), beta = (inf,inf), evaluation = lambda x: 0): dummy_board = np.copy(board) # we don't want to change the board state swap_player = {1:-1,-1:1} # So we can change whose turn options = cccc.available_moves(board) # get legal moves random.shuffle(options) # should inherit move order instead of randomizing # if len(options) == 1: # update_move(board,options[0]) # if cccc.winner(dummy_board): # return (inf,options[0]) # else: # return (0,options[0]) best_value = (-inf,-inf) if not options: print board, cccc.game_over(board) print 'oops, no available moves' cand_move = options[0] if depth == 0: for x in options: update_move(dummy_board,x,turn) op_value = (evaluation(dummy_board*swap_player[turn]) , depth) if tuple(-1 * el for el in op_value) > best_value: cand_move = x best_value = tuple(-1 * el for el in op_value) alpha = max(alpha, best_value) # print depth,-op_value, best_value, cand_move,alpha,beta if alpha >= beta: # print 'pruned' break #alpha-beta cutoff unupdate_move(dummy_board,x) else: for x in options: # dummy_board = np.copy(board) # height= np.where(board[:,x]==0)[0][-1] #connect four only # dummy_board[height, x] = turn update_move(dummy_board,x,turn) if cccc.winner(dummy_board): #should check over and tied too return((inf,depth), x) if cccc.is_full(dummy_board): #This assumes you can't lose on your turn return((0,depth) , x) op_value,_ = alpha_beta_move( dummy_board, swap_player[turn], depth-1, alpha = tuple(-1 * el for el in beta), beta = tuple(-1 * el for el in alpha), evaluation = evaluation) if tuple(-1 * el for el in op_value) > best_value: cand_move = x best_value = tuple(-1 * el for el in op_value) alpha = max(alpha, best_value) # print depth,-op_value, best_value, cand_move,alpha,beta if alpha >= beta: # print 'pruned' break #alpha-beta cutoff unupdate_move(dummy_board,x) # dummy_board[height, x] = 0 return (best_value, cand_move)
game_length = len(result_O.log) input_list = [-1*board_list[2*i+1] for i in range(game_length/2)] output_list = [-1*board_list[2*i+2] for i in range(game_length/2)] move_list = [result_O.log[2*i+1] for i in range(game_length/2)] for epoch in range(train_duration): t1 = time.clock() if len(input_history) > minimum_data: target_history = np.zeros(len(output_history)) print 'Creating Targets for {} data points'.format(len(output_history)) print '\n' t3 = time.clock() for i,item in enumerate(output_history): output_state = np.copy(output_history[i]) if cccc.winner(output_state) or cccc.is_full(output_state): target_history[i] = cccc.winner(output_state) else: #minus because the future term is in terms of the valuation for the player, and we need a target for the #opponent # targets[i] = (1-future_discount) * reward_state + future_discount * get_max_future( #output_state,value_fun) #targets = np.array(targets).reshape(BATCH_SIZE,1) #temporal difference method target_history[i]= 0 current_state = np.copy(output_state) depth = 0 player = 1