Python NeuralNetwork Beispiele, neuralnetworks.NeuralNetwork Python Beispiele

Beispiel #1

0

Datei anzeigen

def trainNNs(X, T, trainFraction, hiddenLayerStructures, numberRepetitions, numberIterations, classify=False):
    import neuralnetworks as nn
    import mlutils as ml
    import numpy as np
    import time
    result = []
    for structure in hiddenLayerStructures:
        trainedResult = []
        testResult = []
        t0 = time.time()
        for n in range(0, numberRepetitions):
            Xtrain,Ttrain,Xtest,Ttest = ml.partition(X,T,(trainFraction, 1-trainFraction),classify)
            if classify:
                nnet = nn.NeuralNetworkClassifier(X.shape[1], structure, len(np.unique(T)))
                nnet.train(Xtrain, Ttrain, numberIterations, errorPrecision=1.e-8)
                trainedResult.append(np.sum(nnet.use(Xtrain)==Ttrain)/len(Ttrain))
                testResult.append(np.sum(nnet.use(Xtest)==Ttest)/len(Ttest))
            else:
                nnet = nn.NeuralNetwork(X.shape[1], structure, T.shape[1])
                nnet.train(Xtrain, Ttrain, numberIterations)
                trainedResult.append(np.sqrt(np.mean(((nnet.use(Xtrain)-Ttrain)**2))))
                testResult.append(np.sqrt(np.mean(((nnet.use(Xtest)-Ttest)**2))))

            
        result.append([structure, trainedResult, testResult, time.time() - t0])
    return result

Beispiel #2

0

Datei anzeigen

Datei: test.py Projekt: vignesh-pagadala/neural-networks

def testfn():
    nnet = nn.NeuralNetwork(1, [4, 5], 1)
    X = np.linspace(0, 10, 10).reshape((-1, 1))
    T = 1.5 + 0.6 * X + 0.8 * np.sin(1.5 * X)
    T[np.logical_and(X > 2, X < 3)] *= 3
    T[np.logical_and(X > 5, X < 7)] *= 3

    nnet.train(X, T, nIterations=1000)

    Y = nnet.use(X)
    fig = plt.figure()
    ax = fig.add_subplot(211)
    ax.plot(range(1, 11), T, color='g', linewidth=1)
    ax.set(ylim=[0, 10], xlim=[0, 11])

    ax2 = fig.add_subplot(211)
    ax2.plot(range(1, 11), Y, color='r', linewidth=1)
    ax2.set(ylim=[0, 10], xlim=[0, 11])
    plt.show()

    ax3 = fig.add_subplot(212)
    ax3.plot(range(1, 11), T, color='r', linewidth=1)
    ax3.set(ylim=[0, 10], xlim=[0, 11])
    plt.show()

    ax4 = fig.add_subplot(212)
    ax4.plot(range(1, 11), Y, color='r', linewidth=1)
    ax4.set(ylim=[0, 10], xlim=[0, 11])
    plt.show()

Beispiel #3

0

Datei anzeigen

Datei: Scratch.py Projekt: doofusdavid/cs440-A6

def trainNNs(X, T, trainFraction, hiddenLayerStructures, numberRepetitions, numberIterations, classify=False):
    """
    Trains neural networks repeatedly.
    :param X: Data to partition and train
    :param T: Target values
    :param trainFraction: What percent of the data should be used for training
    :param hiddenLayerStructures: Number of hidden layer structures while training
    :param numberRepetitions: Number of times to run train
    :param numberIterations: Iterations within Neural Network
    :param classify: Classification or Regression
    :return: List containing the hidden layer structure, the training error and testing error, and the elapsed time.
    """
    import numpy as np
    import neuralnetworks as nn
    import time
    import mlutils as ml

    results = []
    global resultErrors
    resultErrors = []

    # debugging
    verbose = True

    for structure in hiddenLayerStructures:
        trainList = []
        testList = []
        t0 = time.time()
        for i in range(numberRepetitions):
            Xtrain, Ttrain, Xtest, Ttest = ml.partition(X, T, (trainFraction, 1 - trainFraction), classification=classify)
            if classify:
                nnet = nn.NeuralNetworkClassifier(X.shape[1],structure,len(np.unique(T)))
            else:
                nnet = nn.NeuralNetwork(X.shape[1],structure,T.shape[1])

Beispiel #4

0

Datei anzeigen

Datei: A6mysolution.py Projekt: zachgood/Artificial-Intelligence-Projects

def performance(X, T, trainFraction, hidden, numberRepetitions,
                numberIterations):
    # Make the lists for train and test data performance
    trainP = []
    testP = []

    # For numberRepetitions
    for rep in range(numberRepetitions):
        # Use ml.partition to randomly partition X and T into training and testing sets.
        Xtrain, Ttrain, Xtest, Ttest = ml.partition(
            X, T, (trainFraction, 1 - trainFraction), classification=False)

        # Create a neural network of the given structure
        nnet = nn.NeuralNetwork(X.shape[1], hidden, T.shape[1])

        # Train it for numberIterations
        # nnet.train(X, T, numberIterations)
        nnet.train(Xtrain, Ttrain, numberIterations)

        # Use the trained network to produce outputs for the training and for the testing sets
        Ytrain = nnet.use(Xtrain)
        Ytest = nnet.use(Xtest)

        # Calculate the RMSE of training and testing sets.
        trainRMSE = np.sqrt(np.mean((Ytrain - Ttrain)**2))
        testRMSE = np.sqrt(np.mean((Ytest - Ttest)**2))

        # Add the training and testing performance to a collection (such as a list) for this network structure
        trainP.append(trainRMSE)
        testP.append(testRMSE)

    # Return trainP and testP
    return trainP, testP

Beispiel #5

0

Datei anzeigen

Datei: Qnet.py Projekt: jdorcey/NeatNNProject-CS440

def trainQnet(nReps, hiddenLayers, nIterations, nReplays, epsilon, epsilonDecayFactor, game):
    outcomes = np.zeros(nReps)
    n = game.inputSize()
    Qnet = nn.NeuralNetwork(n, hiddenLayers, 1)
    Qnet._standardizeT = lambda x: x
    Qnet._unstandardizeT = lambda x: x

    samples = []  # collect all samples for this repetition, then update the Q network at end of repetition.
    for rep in range(nReps):
        if rep > 0:
            epsilon *= epsilonDecayFactor
        step = 0
        done = False
        samples = []
        samplesNextStateForReplay = []
        
        move, _ = epsilonGreedy(Qnet, epsilon, game)

        while not done:     
            step += 1
            
           # Make this move to update game.state
            game.makeMove(move)
            r = -1
            
            # Choose move from updated game.state
            moveNext, Qnext = epsilonGreedy(Qnet, epsilon, game)

            if game.gameOver():
                # goal found
                Qnext = 0
                done = True
                outcomes[rep] = step
                
                if rep % 10 == 0 or rep == nReps - 1:
                    print('rep = {:d}, epsilon = {:.3f}, steps = {:d}'.format(rep, epsilon, int(outcomes[rep])), end=';\n')
                   
            samples.append([*game.newStateRep(), *move, r, Qnext])
            samplesNextStateForReplay.append([*game.newStateRep(), *moveNext])
            move = deepcopy(moveNext)
            
        samples = np.array(samples)
        X = samples[:,:n]
        T = samples[:,n:n+1] + samples[:,n+1:n+2]
        Qnet.train(X, T, nIterations, verbose=False)

        # Experience Replay: Train on recent samples with updates to Qnext.
        samplesNextStateForReplay = np.array(samplesNextStateForReplay)
        for replay in range(nReplays):
            QnextNotZero = samples[:, n +1] != 0
            samples[QnextNotZero, n +1:n +2] = Qnet.use(samplesNextStateForReplay[QnextNotZero,:])
            T = samples[:, n +1:n +2] + samples[:, n +1:n +2]
            Qnet.train(X, T, nIterations, verbose=False)

    print('TRAINING COMPLETE')
    return Qnet, outcomes, samples

Beispiel #6

0

Datei anzeigen

def trainNNs(X,
             T,
             trainFraction,
             hiddenLayerStructures,
             numberRepetitions,
             numberIterations,
             classify=False):
    results = []
    for structure in hiddenLayerStructures:
        print(structure, end=" ")
        #time each hidden layer structure
        start_time = time.time()
        structureData = [structure]
        trainDataResults = []
        testDataResults = []
        for i in range(0, numberRepetitions):
            #partition data
            Xtrain, Ttrain, Xtest, Ttest = ml.partition(
                X,
                T, (trainFraction, 1 - trainFraction),
                classification=classify)
            if not classify:
                #create/train network
                nnet = nn.NeuralNetwork(Xtrain.shape[1], structure,
                                        Ttrain.shape[1])
                nnet.train(Xtrain, Ttrain, nIterations=numberIterations)
                #test netork
                Ytrain = nnet.use(Xtrain)
                Ytest = nnet.use(Xtest)
                #add error for testing and traing data
                trainDataResults.append(np.sqrt(np.mean((Ytrain - Ttrain)**2)))
                testDataResults.append(np.sqrt(np.mean((Ytest - Ttest)**2)))
            else:
                #create/train network
                nnet = nn.NeuralNetworkClassifier(Xtrain.shape[1], structure,
                                                  np.unique(Ttrain).size)
                nnet.train(Xtrain, Ttrain, nIterations=numberIterations)
                #test netork
                Ptrain = nnet.use(Xtrain)
                Ptest = nnet.use(Xtest)
                #add error for testing and traing data
                trainDataResults.append(1 - (np.sum(Ptrain == Ttrain) /
                                             len(Ttrain)))
                testDataResults.append(1 -
                                       (np.sum(Ptest == Ttest) / len(Ttest)))
        structureData.append(trainDataResults)
        structureData.append(testDataResults)
        structureData.append(time.time() - start_time)
        results.append(structureData)
        print("done")
    return results

Beispiel #7

0

Datei anzeigen

def trainNNs(X,
             T,
             trainFraction,
             hiddenLayerStructures,
             numberRepetitions,
             numberIterations,
             classify=False):
    # Master result list - we shall keep appending to this.
    result = []
    # Iterate through each network structure provided.
    for net in hiddenLayerStructures:
        # To store performances of each training run for a network structure.
        trainPerformance = []
        testPerformance = []
        # To measure time elapsed.
        start_time = time.time()
        # Iterate for number of repetitions to train neural network.
        for i in range(numberRepetitions):
            # Now, we have to partition X and T, into training and testing data.
            Xtrain, Ttrain, Xtest, Ttest = ml.partition(
                X, T, trainFraction * 100, classification=classify)

            # Create a neural network for this structure.
            nnet = nn.NeuralNetwork(1, net, 1)
            # Commence training
            nnet.train(Xtrain, Ttrain, nIterations=numberIterations)
            # Use the trained network to produce outputs (for both training and testing input datasets).
            trainOut = nnet.use(Xtrain)
            testOut = nnet.use(Xtest)
            # If classifying, calculate samples classified incorrectly (for both training and testing datasets).
            if classify == True:
                pass
            else:
                # Calculate error in training set.
                trainError = trainOut - Xtrain
                trainRMSE = np.sqrt(np.mean((trainError**2)))
                # Calculate error in testing set
                testError = testOut - Xtest
                testRMSE = np.sqrt(np.mean((testError**2)))

            # Append train and test performances to list.
            trainPerformance.append(trainRMSE)
            testPerformance.append(testRMSE)
        end_time = time.time()
        elapsed = end_time - start_time
        # Now, we append everything to the master 'result' list.
        result.append([net, trainPerformance, testPerformance, elapsed])

    return result

Beispiel #8

0

Datei anzeigen

Datei: my_nn_solution.py Projekt: prashantkthakur/Artificial-Intelligence

def trainNNs(X, T, trainFraction, hiddenLayerStructures, numberRepetitions,
             numberIterations, classify):
    results = []

    # Do tasks here
    for h_layer in hiddenLayerStructures:
        start = time.time()
        train_rmse = []
        test_rmse = []
        for repetition in range(numberRepetitions):
            Xtrain, Ttrain, Xtest, Ttest = ml.partition(
                X,
                T, (trainFraction, 1 - trainFraction),
                classification=classify)
            if classify:
                nnet = nn.NeuralNetworkClassifier(X.shape[1], h_layer,
                                                  T.shape[1])
                nnet.train(Xtrain, Ttrain, numberIterations)
                predTest, probsTest, _ = nnet.use(
                    Xtest, allOutputs=True)  # discard hidden unit outputs
                ml.percentCorrect(predTest, Ttest)

            else:
                nnet = nn.NeuralNetwork(X.shape[1], h_layer, T.shape[1])
                nnet.train(Xtrain, Ttrain, numberIterations)
                Ytrain = nnet.use(Xtrain)
                Ytest = nnet.use(Xtest)
                trn_rmse = np.sqrt(np.mean((Ytrain - Ttrain)**2))
                tst_rmse = np.sqrt(np.mean((Ytest - Ttest)**2))
                train_rmse.append(trn_rmse)
                test_rmse.append(tst_rmse)

            if repetition == (numberRepetitions - 1):
                total_time = time.time() - start
                results.append([h_layer, train_rmse, test_rmse, total_time])

    # End tasks

    # print(results)
    return results

Beispiel #9

0

Datei anzeigen

Datei: ai.py Projekt: josephgriffith/Tetris-AI-Project

def train(nReps, hiddenLayers, epsilon, epsilonDecayFactor, nTrainIterations,
          nReplays):
    # The inputs to the neural network are:
    #   width * height 0/1 values for the board
    #   7 inputs for which piece we're placing
    #   A column to place the piece in - 10 values
    #   A piece rotation - 4 values
    # The output from the neural network is:
    #   A single number to represent the estimated number of moves to game over.
    boardWidth = 10
    boardHeight = 20
    numDataCols = boardWidth * boardHeight + 7 + 10 + 4
    Qnet = nn.NeuralNetwork(numDataCols, hiddenLayers, 1)
    Qnet._standardizeT = lambda x: x
    Qnet._unstandardizeT = lambda x: x

    outcomes = np.zeros(nReps)
    for rep in range(nReps):
        if rep > 0:
            epsilon *= epsilonDecayFactor

        # Play a game, collecting samples
        samples = []
        samplesNextStateForReplay = []
        board = tetris.Board(boardWidth, boardHeight)
        move, _ = epsilonGreedy(Qnet, board, epsilon)
        done = False
        step = 0
        while not done:
            step += 1

            if step > 100:
                return Qnet, outcomes

            # print(board)

            newBoard = deepcopy(board)
            newBoard.make_move(move)

            if newBoard.game_over:
                done = True
                Qnext = 0
                outcomes[rep] = step
                print("Played game", rep, ", lasted for", step,
                      "moves, epsilon is", epsilon)
            else:
                moveNext, Qnext = epsilonGreedy(Qnet, newBoard, epsilon)

            r = -1
            stateRepresentation = board.getStateRepresentation()
            moveRepresentation = board.getMoveRepresentation(move)
            # fullRep = (*stateRepresentation, *moveRepresentation)
            # It's possible to see the same board state twice.
            # If that happens, we should only keep the one furthest from game over,
            # since that represents the true game length from that state.
            # if fullRep in samples:
            #     (_, existingQnext) = samples[fullRep]
            #     Qnext = min(Qnext, existingQnext)
            # samples[fullRep] = (r, Qnext)
            samples.append(
                [*stateRepresentation, *moveRepresentation, r, Qnext])
            samplesNextStateForReplay.append([
                *newBoard.getStateRepresentation(),
                *newBoard.getMoveRepresentation(moveNext)
            ])

            move = moveNext
            board = newBoard

        # Convert samples to an array.
        # samples_ary = []
        # for key, value in samples.items():
        #     samples_ary.append([*key, *value])
        # samples = np.array(samples_ary)
        samples = np.array(samples)
        #print(samples[:, numDataCols+1])
        #print(samples)
        X = samples[:, :numDataCols]
        T = samples[:, numDataCols:numDataCols +
                    1] + samples[:, numDataCols + 1:numDataCols + 2]

        # We know how many moves were remaining at each state of the game, since we can count from the end
        # of the game.  So let's use that data to train.
        # T = np.array(range(len(samples)-1, -1, -1))

        Qnet.train(X, T, nTrainIterations, verbose=False)
        # print(Qnet.W[:,0])

        #print(Qnet.getErrorTrace())

        # Experience replay
        samplesNextStateForReplay = np.array(samplesNextStateForReplay)
        for replay in range(nReplays):
            QnextNotZero = samples[:, numDataCols + 1] != 0
            samples[QnextNotZero, numDataCols + 1:numDataCols + 2] = Qnet.use(
                samplesNextStateForReplay[QnextNotZero, :])
            T = samples[:, numDataCols:numDataCols +
                        1] + samples[:, numDataCols + 1:numDataCols + 2]
            Qnet.train(X, T, nTrainIterations, verbose=False)

    return Qnet, outcomes

Beispiel #10

0

Datei anzeigen

# Start with the ```NeuralNetwork``` class defined in lecture notes 09. Put that class definition as written into *neuralnetworks.py* into your current directory.  Also place *mlutilities.py* from lecture notes 09 in your current directory. If this is done correctly, then the following code should run and produce results similar to what is shown here.

# In[1]:

import numpy as np
import matplotlib.pyplot as plt
get_ipython().magic('matplotlib inline')

# In[2]:

import neuralnetworks as nn

X = np.arange(10).reshape((-1, 1))
T = np.sin(X)

nnet = nn.NeuralNetwork(1, [10], 1)
nnet.train(X, T, 100, verbose=True)
nnet

# In[3]:

plt.figure(figsize=(8, 12))
plt.subplot(3, 1, 1)
plt.plot(nnet.getErrors())

plt.subplot(3, 1, 2)
plt.plot(X, T, 'o-', label='Actual')
plt.plot(X, nnet.use(X), 'o-', label='Predicted')

plt.subplot(3, 1, 3)
nnet.draw()

Beispiel #11

0

Datei anzeigen

Datei: my_nn_solution.py Projekt: prashantkthakur/Artificial-Intelligence

    # End tasks

    # print(results)
    return results


def summary(results):
    output = []
    for val in results:
        output.append([val[0], np.mean(val[1]), np.mean(val[2]), val[-1]])
    return output


def bestNetwork(summaries):
    val = np.array(summaries, dtype=object)
    idx = np.argmin(val[:, 2])
    return summaries[idx]


X = np.arange(10).reshape((-1, 1))
T = X + 1 + np.random.uniform(-1, 1, ((10, 1)))
X.shape, T.shape
nnet = nn.NeuralNetwork(X.shape[1], 2, T.shape[1])
nnet.train(X, T, 5)
# print(nnet.getErrorTrace())
# result = trainNNs(X, T, 0.8, [0, 10, [10, 10]], 5, 100, classify=False)
# result = trainNNs(X, T, 0.8, [0, 1, 2, 10, [10, 10], [5, 5, 5, 5], [2]*5], 50, 400, classify=False)
# print(result[0])
# print(bestNetwork(summary(result)))
print(bestNetwork(([[[1, 1], 1.3, 2.3, 0.5], [[2, 2, 2], 4.3, 1.3, 0.6]])))

Beispiel #12

0

Datei anzeigen

Datei: A3grader.py Projekt: mfejczaruk-l4a/MachineLearningFinal

    if func not in dir() or not callable(globals()[func]):
        print('CRITICAL ERROR: Function named \'{}\' is not defined'.format(func))
        print('  Check the spelling and capitalization of the function name.')
        def run_parameters_act(*args, verbose=False):
            global not_implemented
            not_implemented = True
            return pd.DataFrame([{'Activation': 'nope','RMSE Test': 0}, {'Activation': 'nope', 'RMSE Test':0, 'Epochs': 0}])


print('''\nTesting:
import neuralnetworks as nn
nnet = nn.NeuralNetwork(4, [10], 1)
acts = nnet.activation(np.array([-0.5, 1.5]))''')
      
import neuralnetworks as nn
nnet = nn.NeuralNetwork(4, [10], 1)
try:
    acts = nnet.activation(np.array([-0.5, 1.5]))
    correct_acts = np.array([-0.46211716,  0.90514825])
    if np.sum(np.abs(acts - correct_acts)) < 0.1:
        g += 10
        print('\n--- 10/10 points. nnet.activation() is correct.')
    else:
        g += 0
        print('\n---  0/10 points. nnet.activation() is {} but correct value is {}.'.format(acts, correct_acts))
except Exception as ex:
    print('\n--- 0/10 points. nnet.activation() raised exception', ex)

print('''\nTesting:
dacts = nnet.activation_derivative({})'''.format(correct_acts))

Beispiel #13

0

Datei anzeigen

def trainQnet(nBatches, nRepsPerBatch, hiddenLayers, nIterations, nReplays,
              epsilon, epsilonDecayFactor):
    outcomes = np.zeros(
        nBatches * nRepsPerBatch
    )  # holds number of steps to victory. Should hold the number of outcome, win loss or draw
    # Create a 68 to one mapping. Checker state and move pair to one Q value
    # Uses hiddenLayers
    Qnet = nn.NeuralNetwork(68, hiddenLayers, 1)
    Qnet._standardizeT = lambda x: x
    Qnet._unstandardizeT = lambda x: x
    samples = [
    ]  # I know it looks like its double initialized. But this is necessary
    #Counts the total number of reps
    repk = -1

    # Big batch, each of these creates something on which to train the Q network
    for batch in range(nBatches):
        # decay epsilon after first repitition, then cap it at .01
        if batch > 0:
            epsilon *= epsilonDecayFactor
            epsilon = max(0.01, epsilon)

        samples = []
        samplesNextStateForReplay = []

        #Simulate #reps games
        for rep in range(nRepsPerBatch):
            repk += 1
            step = 0
            done = False

            state = Board()  # create a new board to represent the state
            move, _ = epsilonGreedy(
                Qnet, state,
                epsilon)  # Different than Qdict reinforcement, move first
            # Red goes first!

            while not done:
                step += 1

                # Make this move to get to nextState. Find the board state for the next move.
                stateNext = deepcopy(state)
                stateNext.makeMove(move)

                # step reinforcement is zero to let it only learn to win.
                # could give it some reinforcements though. Leave this here to allow that
                r = 0
                # Qnext is none at first, later we check if its none to equivalent of temporal difference.
                Qnext = None

                # Now check to see if the game is over. Red just played,
                # so we shouldn't need to check if red is the winner.
                if finished(stateNext):  # GG, red won
                    # goal found. Q is one for winners
                    # could try a larger reinforcement....
                    Qnext = 1
                    done = True
                    outcomes[repk] = 1
                    if rep % 10 == 0 or rep == nRepsPerBatch - 1:
                        print(
                            'Red won: batch={:d} rep={:d} epsilon={:.3f} steps={:d} outcome={:d}'
                            .format(batch, repk, epsilon, step,
                                    int(outcomes[repk])),
                            end=', ')
                else:
                    # blacks turn
                    # choose a random choice for black.
                    blackMoves = stateNext.validMoves()
                    moveBlack = blackMoves[np.random.choice(
                        range(len(blackMoves)))]
                    stateNext.makeMove(moveBlack)
                    if finished(stateNext):  # BG, red lost
                        Qnext = -1  # <-  negative reinforcement for loss
                        outcomes[repk] = -1
                        done = True
                        if rep % 10 == 0 or rep == nRepsPerBatch - 1:
                            print(
                                'Black won: batch={:d} rep={:d} epsilon={:.3f} steps={:d} outcome={:d}'
                                .format(batch, repk, epsilon, step,
                                        int(outcomes[repk])),
                                end=', ')

                # At this point, were back at red's turn and can get the q from epsilon greedy if not found
                if Qnext is None:
                    moveNext, Qnext = epsilonGreedy(Qnet, stateNext, epsilon)
                else:
                    if len(stateNext.validMoves()) > 0:
                        moveNext, _ = epsilonGreedy(Qnet, stateNext, epsilon)
                    else:
                        moveNext = (
                            (0, 0), [(0, 0)]
                        )  #placeholder, really there isn't a next move in this case because we lost.

                # append a vector, reinforcement, Qnext list to samples.
                samples.append([*state.stateMoveVectorForNN(move), r, Qnext])
                # Don't worry about what this does, not 100% necessary
                samplesNextStateForReplay.append(
                    [*stateNext.stateMoveVectorForNN(moveNext), *moveNext])

                state = deepcopy(stateNext)
                move = deepcopy(moveNext)

            # Train on samples collected from batch.
            npsamples = np.array(samples)
            X = npsamples[:, :
                          68]  # X is the first part of the samples, the state input to the nn
            T = npsamples[:, 68:
                          69] + npsamples[:, 69:
                                          70]  # This is the target, reinforcemen(0 for now) plus Q
            Qnet.train(X, T, nIterations, verbose=False)

            # Experience Replay: Train on recent samples with updates to Qnext.
            # Not 100% needed, could just use the top part.
            #samplesNextStateForReplay = np.array(samplesNextStateForReplay)
            for replay in range(nReplays):
                # for sample, stateNext in zip(samples, samplesNextStateForReplay):
                # moveNext, Qnext = epsilonGreedy(Qnet, stateNext, epsilon, validMovesF)
                # sample[6] = Qnext
                # print('before',samples[:5,6])
                QnextNotZero = npsamples[:, 6] != 0
                npsamples[QnextNotZero, 6:7] = Qnet.use(
                    samplesNextStateForReplay[QnextNotZero, :])
                # print('after',samples[:5,6])
                T = npsamples[:, 5:6] + npsamples[:, 6:7]
                Qnet.train(X, T, nIterations, verbose=False)

    print('DONE')
    Qnet.outcomes = outcomes
    Qnet.samples = samples
    return Qnet