def game_over(board):
    board = np.array(board).reshape((6,7))
    return cccc.winner(board) or cccc.is_full(board)
def game_over(board):
    board = np.array(board).reshape((6, 7))
    return cccc.winner(board) or cccc.is_full(board)
Esempio n. 3
0
def game_over(board):
    return cccc.winner(board) or cccc.is_full(board)
Esempio n. 4
0
def alpha_beta_move(board, turn, depth = 0, alpha = (-inf,-inf), beta = (inf,inf), evaluation = lambda x: 0):
    dummy_board = np.copy(board) # we don't want to change the board state

    swap_player = {1:-1,-1:1} # So we can change whose turn
    options = cccc.available_moves(board) # get legal moves
    random.shuffle(options) # should inherit move order instead of randomizing


#     if len(options) == 1:
#         update_move(board,options[0])
#         if cccc.winner(dummy_board):
#             return (inf,options[0])
#         else:
#             return (0,options[0])   
    
    best_value = (-inf,-inf)
    
    if not options:
        print board, cccc.game_over(board)
        print 'oops, no available moves'
    cand_move = options[0]
    if depth == 0: 
        for x in options:
            update_move(dummy_board,x,turn)
            op_value = (evaluation(dummy_board*swap_player[turn]) , depth)

            if tuple(-1 * el for el in op_value) > best_value:
                cand_move = x
                best_value = tuple(-1 * el for el in op_value)
                alpha = max(alpha, best_value)
    #        print depth,-op_value, best_value, cand_move,alpha,beta
            if alpha >= beta:
    #                print 'pruned'
                break   #alpha-beta cutoff
            unupdate_move(dummy_board,x)
    else:
    
    
    
        for x in options:

    #        dummy_board = np.copy(board)
    #        height= np.where(board[:,x]==0)[0][-1] #connect four only
    #        dummy_board[height, x] = turn
            update_move(dummy_board,x,turn)
        
            if cccc.winner(dummy_board): #should check over and tied too
                return((inf,depth), x)
            
            if cccc.is_full(dummy_board): #This assumes you can't lose on your turn
                return((0,depth) , x)
            
            op_value,_ = alpha_beta_move( dummy_board,
                                            swap_player[turn],
                                            depth-1,
                                            alpha = tuple(-1 * el for el in beta),
                                            beta = tuple(-1 * el for el in alpha),
                                            evaluation = evaluation)

            if tuple(-1 * el for el in op_value) > best_value:
                cand_move = x
                best_value = tuple(-1 * el for el in op_value)
                alpha = max(alpha, best_value)
    #        print depth,-op_value, best_value, cand_move,alpha,beta
            if alpha >= beta:
    #                print 'pruned'
                break   #alpha-beta cutoff
            unupdate_move(dummy_board,x)
    #        dummy_board[height, x] = 0
    return (best_value, cand_move)
Esempio n. 5
0
    game_length = len(result_O.log)
    input_list = [-1*board_list[2*i+1] for i in range(game_length/2)]
    output_list = [-1*board_list[2*i+2] for i in range(game_length/2)]
    move_list = [result_O.log[2*i+1] for i in range(game_length/2)]
for epoch in range(train_duration):
    
    t1 = time.clock()
     
    if len(input_history) > minimum_data:
        target_history = np.zeros(len(output_history))
        print 'Creating Targets for {} data points'.format(len(output_history))
        print '\n'
        t3 = time.clock()
        for i,item in enumerate(output_history):
            output_state = np.copy(output_history[i])
            if cccc.winner(output_state) or cccc.is_full(output_state):
                target_history[i] = cccc.winner(output_state)
            else:
            #minus because the future term is in terms of the valuation for the player, and we need a target for the 
            #opponent
            #    targets[i] = (1-future_discount) * reward_state + future_discount * get_max_future(
            #output_state,value_fun)
            #targets = np.array(targets).reshape(BATCH_SIZE,1)

                #temporal difference method
                target_history[i]= 0
                current_state = np.copy(output_state)

                depth = 0
                player = 1