Example #1
0
def human_make_move(board):
    #makes move and return the new state
    print "board before human move:"
    print board
    return cf.make_move(board,
                        input("Enter cols 1 to 7 as your next move: ") - 1,
                        False)
Example #2
0
def train_dnn_player():

    #start reinforcement learning
    epochs = 10000  #no of rounds
    gamma = 0.9  #discount factor
    epsilon = 1  #exploitation vs exploration
    buffer = 80
    replay = []
    batchSize = 40
    h = 0

    for i in range(epochs):
        #start a new game
        state = cf.init_game()  #state: (board, status)
        print('Game #: {}'.format(i))

        while state[1] == 'ongoing':
            #dnn_player makes it move based on q-learning
            print "board before dnn player:"
            print state[0]

            qval = model.predict(reshape_board(state[0]), batch_size=1)
            if np.random.random() < epsilon:
                action = np.random.randint(0, 7)
            else:
                action = np.argmax(qval)

            new_state = cf.make_move(state[0], action,
                                     True)  #(board, col, first_player?)

            #get the reward for the new state
            reward = cf.get_reward(new_state[1])

            #let minimax play
            if new_state[1] == 'ongoing':
                new_state = cf.make_move(new_state[0],
                                         mini_ai.minimax(new_state[0], False),
                                         False)
                newQ = model.predict(reshape_board(new_state[0]), batch_size=1)
                maxQ = np.max(newQ)
                update = (reward + (gamma * maxQ))
            else:
                update = reward

            #experience replay; basically we are going to create a list of replays for the NN to refer to everytime it's called
            if (len(replay) < buffer):
                replay.append((state, action, update, new_state))
            else:  #buffer is full, overwrite old values
                if (h < (buffer - 1)):
                    h += 1
                else:
                    h = 0
                replay[h] = (state, action, update, new_state)

                #randomly sample our experience replay memory
                minibatch = random.sample(replay, batchSize)
                X_train = []
                y_train = []

                for memory in minibatch:
                    #get max_Q(S', a)
                    old_state, action, update, new_state = memory
                    old_qval = model.predict(reshape_board(old_state[0]),
                                             batch_size=1)
                    """
                    newQ = model.predict(reshape_board(new_state[0]), batch_size=1)
                    maxQ = np.max(newQ)
                    y = np.zeros((1,7))
                    y[:] = old_qval[:]
                    if new_state[1] =='ongoing':
                        new_state = cf.make_move(new_state[0], mini_ai.minimax(new_state[0], False), False)
                        #newQ = model.predict(reshape_board(new_state[0]), batch_size=1)                        
                        update = (reward + (gamma * maxQ))
                    else:
                        update = reward
                        
                    y[0][action] = update
                    """

                    y = np.zeros((1, 7))
                    y[:] = old_qval[:]
                    y[0][action] = update

                    X_train.append(reshape_board(old_state[0]).reshape(126))
                    y_train.append(y.reshape(7))

                X_train = np.array(X_train)
                y_train = np.array(y_train)
                model.fit(X_train, y_train, batch_size=batchSize, verbose=1)

            state = new_state

        if epsilon > 0.1:
            epsilon -= (1 / epochs)

        print(state)
Example #3
0
def constant_AI_make_move(board):
    print "board before constant_AI:"
    print board
    return cf.make_move(board, 1, False)
Example #4
0
def train_dnn_player():    
    
    #start reinforcement learning
    epochs = 10000 #no of rounds
    gamma = 0.9 #discount factor
    epsilon = 1 #exploitation vs exploration
    buffer = 80
    replay = []
    batchSize = 40
    h = 0
    
    for i in range(epochs):        
        #start a new game        
        state = cf.init_game() #state: (board, status)
        print ('Game #: {}'.format(i))
        
        while state[1] == 'ongoing':
            #dnn_player makes it move based on q-learning
            print "board before dnn player:"
            print state[0]
            
            qval = model.predict(reshape_board(state[0]), batch_size=1)            
            if np.random.random() < epsilon:
                action = np.random.randint(0,7)
            else:
                action = np.argmax(qval)
            
            new_state = cf.make_move(state[0], action, True) #(board, col, first_player?)

            #get the reward for the new state
            reward = cf.get_reward(new_state[1])
            
            #let minimax play
            if new_state[1] =='ongoing':
                new_state = cf.make_move(new_state[0], mini_ai.minimax(new_state[0], False), False)
                newQ = model.predict(reshape_board(new_state[0]), batch_size=1)
                maxQ = np.max(newQ)
                update = (reward + (gamma * maxQ))
            else:
                update = reward
            
            
            
            #experience replay; basically we are going to create a list of replays for the NN to refer to everytime it's called
            if (len(replay)<buffer):
                replay.append((state, action, update, new_state))
            else: #buffer is full, overwrite old values
                if (h < (buffer-1)):
                    h += 1
                else:
                    h = 0
                replay[h] = (state, action, update, new_state)
                
                #randomly sample our experience replay memory
                minibatch = random.sample(replay, batchSize)
                X_train = []
                y_train = []
                
                for memory in minibatch:
                    #get max_Q(S', a)
                    old_state, action, update, new_state = memory
                    old_qval = model.predict(reshape_board(old_state[0]), batch_size=1)
                    
                    """
                    newQ = model.predict(reshape_board(new_state[0]), batch_size=1)
                    maxQ = np.max(newQ)
                    y = np.zeros((1,7))
                    y[:] = old_qval[:]
                    if new_state[1] =='ongoing':
                        new_state = cf.make_move(new_state[0], mini_ai.minimax(new_state[0], False), False)
                        #newQ = model.predict(reshape_board(new_state[0]), batch_size=1)                        
                        update = (reward + (gamma * maxQ))
                    else:
                        update = reward
                        
                    y[0][action] = update
                    """
                    
                    y = np.zeros((1,7))
                    y[:] = old_qval[:]
                    y[0][action] = update
                    
                    X_train.append( reshape_board(old_state[0]).reshape(126) )
                    y_train.append( y.reshape(7) )
                   
                X_train = np.array(X_train)
                y_train = np.array(y_train)
                model.fit(X_train, y_train, batch_size=batchSize, verbose=1)
            
            state = new_state
                
        if epsilon > 0.1:
            epsilon -= (1 / epochs)

        print(state)
Example #5
0
def constant_AI_make_move(board):
    print "board before constant_AI:"
    print board
    return cf.make_move(board, 1, False)
Example #6
0
def human_make_move(board):
    #makes move and return the new state
    print "board before human move:"
    print board
    return cf.make_move(board, input("Enter cols 1 to 7 as your next move: ")-1, False)