Exemple #1
0
def trainNNs(X, T, trainFraction, hiddenLayerStructures, numberRepetitions, numberIterations, classify=False):
    """
    Trains neural networks repeatedly.
    :param X: Data to partition and train
    :param T: Target values
    :param trainFraction: What percent of the data should be used for training
    :param hiddenLayerStructures: Number of hidden layer structures while training
    :param numberRepetitions: Number of times to run train
    :param numberIterations: Iterations within Neural Network
    :param classify: Classification or Regression
    :return: List containing the hidden layer structure, the training error and testing error, and the elapsed time.
    """
    import numpy as np
    import neuralnetworks as nn
    import time
    import mlutils as ml

    results = []
    global resultErrors
    resultErrors = []

    # debugging
    verbose = True

    for structure in hiddenLayerStructures:
        trainList = []
        testList = []
        t0 = time.time()
        for i in range(numberRepetitions):
            Xtrain, Ttrain, Xtest, Ttest = ml.partition(X, T, (trainFraction, 1 - trainFraction), classification=classify)
            if classify:
                nnet = nn.NeuralNetworkClassifier(X.shape[1],structure,len(np.unique(T)))
            else:
                nnet = nn.NeuralNetwork(X.shape[1],structure,T.shape[1])
Exemple #2
0
def trainNNs(X, T, trainFraction, hiddenLayerStructures, numberRepetitions, numberIterations, classify=False):
    import neuralnetworks as nn
    import mlutils as ml
    import numpy as np
    import time
    result = []
    for structure in hiddenLayerStructures:
        trainedResult = []
        testResult = []
        t0 = time.time()
        for n in range(0, numberRepetitions):
            Xtrain,Ttrain,Xtest,Ttest = ml.partition(X,T,(trainFraction, 1-trainFraction),classify)
            if classify:
                nnet = nn.NeuralNetworkClassifier(X.shape[1], structure, len(np.unique(T)))
                nnet.train(Xtrain, Ttrain, numberIterations, errorPrecision=1.e-8)
                trainedResult.append(np.sum(nnet.use(Xtrain)==Ttrain)/len(Ttrain))
                testResult.append(np.sum(nnet.use(Xtest)==Ttest)/len(Ttest))
            else:
                nnet = nn.NeuralNetwork(X.shape[1], structure, T.shape[1])
                nnet.train(Xtrain, Ttrain, numberIterations)
                trainedResult.append(np.sqrt(np.mean(((nnet.use(Xtrain)-Ttrain)**2))))
                testResult.append(np.sqrt(np.mean(((nnet.use(Xtest)-Ttest)**2))))

            
        result.append([structure, trainedResult, testResult, time.time() - t0])
    return result
def performanceC(X, T, trainFraction, hidden, numberRepetitions,
                 numberIterations):
    # Make the lists for train and test data performance
    trainP = []
    testP = []

    # For numberRepetitions
    for rep in range(numberRepetitions):
        # Use ml.partition to randomly partition X and T into training and testing sets.
        Xtrain, Ttrain, Xtest, Ttest = ml.partition(
            X, T, (trainFraction, 1 - trainFraction), classification=True)

        # Create a neural network of the given structure
        nnet = nn.NeuralNetworkClassifier(X.shape[1], hidden,
                                          len(np.unique(T)))

        # Train it for numberIterations
        # nnet.train(X, T, numberIterations)
        nnet.train(Xtrain, Ttrain, numberIterations)

        # Use the trained network to produce outputs for the training and for the testing sets
        Ytrain = nnet.use(Xtrain)
        Ytest = nnet.use(Xtest)

        # Calculate the fraction of samples incorrectly classified for training and testing sets
        trainFrac = np.sum(Ytrain != Ttrain) / Ttrain.shape[0]
        testFrac = np.sum(Ytest != Ttest) / Ttest.shape[0]

        # Add the training and testing performance to a collection (such as a list) for this network structure
        trainP.append(trainFrac)
        testP.append(testFrac)

    # Return trainP and testP
    return trainP, testP
def performance(X, T, trainFraction, hidden, numberRepetitions,
                numberIterations):
    # Make the lists for train and test data performance
    trainP = []
    testP = []

    # For numberRepetitions
    for rep in range(numberRepetitions):
        # Use ml.partition to randomly partition X and T into training and testing sets.
        Xtrain, Ttrain, Xtest, Ttest = ml.partition(
            X, T, (trainFraction, 1 - trainFraction), classification=False)

        # Create a neural network of the given structure
        nnet = nn.NeuralNetwork(X.shape[1], hidden, T.shape[1])

        # Train it for numberIterations
        # nnet.train(X, T, numberIterations)
        nnet.train(Xtrain, Ttrain, numberIterations)

        # Use the trained network to produce outputs for the training and for the testing sets
        Ytrain = nnet.use(Xtrain)
        Ytest = nnet.use(Xtest)

        # Calculate the RMSE of training and testing sets.
        trainRMSE = np.sqrt(np.mean((Ytrain - Ttrain)**2))
        testRMSE = np.sqrt(np.mean((Ytest - Ttest)**2))

        # Add the training and testing performance to a collection (such as a list) for this network structure
        trainP.append(trainRMSE)
        testP.append(testRMSE)

    # Return trainP and testP
    return trainP, testP
Exemple #5
0
def trainNNs(X,
             T,
             trainFraction,
             hiddenLayerStructures,
             numberRepetitions,
             numberIterations,
             classify=False):
    results = []
    for structure in hiddenLayerStructures:
        print(structure, end=" ")
        #time each hidden layer structure
        start_time = time.time()
        structureData = [structure]
        trainDataResults = []
        testDataResults = []
        for i in range(0, numberRepetitions):
            #partition data
            Xtrain, Ttrain, Xtest, Ttest = ml.partition(
                X,
                T, (trainFraction, 1 - trainFraction),
                classification=classify)
            if not classify:
                #create/train network
                nnet = nn.NeuralNetwork(Xtrain.shape[1], structure,
                                        Ttrain.shape[1])
                nnet.train(Xtrain, Ttrain, nIterations=numberIterations)
                #test netork
                Ytrain = nnet.use(Xtrain)
                Ytest = nnet.use(Xtest)
                #add error for testing and traing data
                trainDataResults.append(np.sqrt(np.mean((Ytrain - Ttrain)**2)))
                testDataResults.append(np.sqrt(np.mean((Ytest - Ttest)**2)))
            else:
                #create/train network
                nnet = nn.NeuralNetworkClassifier(Xtrain.shape[1], structure,
                                                  np.unique(Ttrain).size)
                nnet.train(Xtrain, Ttrain, nIterations=numberIterations)
                #test netork
                Ptrain = nnet.use(Xtrain)
                Ptest = nnet.use(Xtest)
                #add error for testing and traing data
                trainDataResults.append(1 - (np.sum(Ptrain == Ttrain) /
                                             len(Ttrain)))
                testDataResults.append(1 -
                                       (np.sum(Ptest == Ttest) / len(Ttest)))
        structureData.append(trainDataResults)
        structureData.append(testDataResults)
        structureData.append(time.time() - start_time)
        results.append(structureData)
        print("done")
    return results
Exemple #6
0
def trainNNs(X,
             T,
             trainFraction,
             hiddenLayerStructures,
             numberRepetitions,
             numberIterations,
             classify=False):
    # Master result list - we shall keep appending to this.
    result = []
    # Iterate through each network structure provided.
    for net in hiddenLayerStructures:
        # To store performances of each training run for a network structure.
        trainPerformance = []
        testPerformance = []
        # To measure time elapsed.
        start_time = time.time()
        # Iterate for number of repetitions to train neural network.
        for i in range(numberRepetitions):
            # Now, we have to partition X and T, into training and testing data.
            Xtrain, Ttrain, Xtest, Ttest = ml.partition(
                X, T, trainFraction * 100, classification=classify)

            # Create a neural network for this structure.
            nnet = nn.NeuralNetwork(1, net, 1)
            # Commence training
            nnet.train(Xtrain, Ttrain, nIterations=numberIterations)
            # Use the trained network to produce outputs (for both training and testing input datasets).
            trainOut = nnet.use(Xtrain)
            testOut = nnet.use(Xtest)
            # If classifying, calculate samples classified incorrectly (for both training and testing datasets).
            if classify == True:
                pass
            else:
                # Calculate error in training set.
                trainError = trainOut - Xtrain
                trainRMSE = np.sqrt(np.mean((trainError**2)))
                # Calculate error in testing set
                testError = testOut - Xtest
                testRMSE = np.sqrt(np.mean((testError**2)))

            # Append train and test performances to list.
            trainPerformance.append(trainRMSE)
            testPerformance.append(testRMSE)
        end_time = time.time()
        elapsed = end_time - start_time
        # Now, we append everything to the master 'result' list.
        result.append([net, trainPerformance, testPerformance, elapsed])

    return result
def trainNNs(X, T, trainFraction, hiddenLayerStructures, numberRepetitions,
             numberIterations, classify):
    results = []

    # Do tasks here
    for h_layer in hiddenLayerStructures:
        start = time.time()
        train_rmse = []
        test_rmse = []
        for repetition in range(numberRepetitions):
            Xtrain, Ttrain, Xtest, Ttest = ml.partition(
                X,
                T, (trainFraction, 1 - trainFraction),
                classification=classify)
            if classify:
                nnet = nn.NeuralNetworkClassifier(X.shape[1], h_layer,
                                                  T.shape[1])
                nnet.train(Xtrain, Ttrain, numberIterations)
                predTest, probsTest, _ = nnet.use(
                    Xtest, allOutputs=True)  # discard hidden unit outputs
                ml.percentCorrect(predTest, Ttest)

            else:
                nnet = nn.NeuralNetwork(X.shape[1], h_layer, T.shape[1])
                nnet.train(Xtrain, Ttrain, numberIterations)
                Ytrain = nnet.use(Xtrain)
                Ytest = nnet.use(Xtest)
                trn_rmse = np.sqrt(np.mean((Ytrain - Ttrain)**2))
                tst_rmse = np.sqrt(np.mean((Ytest - Ttest)**2))
                train_rmse.append(trn_rmse)
                test_rmse.append(tst_rmse)

            if repetition == (numberRepetitions - 1):
                total_time = time.time() - start
                results.append([h_layer, train_rmse, test_rmse, total_time])

    # End tasks

    # print(results)
    return results
Exemple #8
0
            
        result.append([structure, trainedResult, testResult, time.time() - t0])
    return result

def summarize(results):
    import numpy as np
    summaryResults = []
    for result in results:
        summaryResults.append([result[0], np.mean(result[1]), np.mean(result[2]), result[3]])
    return summaryResults


def bestNetwork(summary):
    best = min(summary, key=lambda l: l[2])
    return best

data = pd.read_csv("templates/data1Normed.csv")
names = list(data)
data["signcode"] = data["sign"].astype('category').cat.codes
data = data.values
Xhands = data[:, 0:63]
Xhands = Xhands.astype(np.float64)
Tsign = data[:, 64:65]
Tsign = Tsign.astype(np.int32)
#run best on 
Xtrain,Ttrain,Xtest,Ttest = ml.partition(Xhands,Tsign,(0.8, 0.2),True)
nnet = nn.NeuralNetworkClassifier(Xtrain.shape[1], bestNet, len(np.unique(Ttrain)))
nnet.train(Xtrain, Ttrain, 200)

result = nnet.use(Xtest)
Exemple #9
0
def run_train(rank, size, mode):

    client = InsecureClient('http://juneau:46731',
                            user='******')  # HDFS Web UI port!!
    with client.read("/pubg/aggregate/agg_match_stats_0.csv") as f:
        df = pd.read_csv(f, usecols=[1, 3, 4, 9, 12]).replace(to_replace={
            'tpp': 2,
            'fpp': 1
        },
                                                              value=None)
    with client.read("/pubg/aggregate/agg_match_stats_1.csv") as f:
        temp = pd.read_csv(f, usecols=[1, 3, 4, 9, 12]).replace(to_replace={
            'tpp': 2,
            'fpp': 1
        },
                                                                value=None)
        df = df.append(temp, ignore_index=True)
    with client.read("/pubg/aggregate/agg_match_stats_2.csv") as f:
        temp = pd.read_csv(f, usecols=[1, 3, 4, 9, 12]).replace(to_replace={
            'tpp': 2,
            'fpp': 1
        },
                                                                value=None)
        df = df.append(temp, ignore_index=True)
    with client.read("/pubg/aggregate/agg_match_stats_3.csv") as f:
        temp = pd.read_csv(f, usecols=[1, 3, 4, 9, 12]).replace(to_replace={
            'tpp': 2,
            'fpp': 1
        },
                                                                value=None)
        df = df.append(temp, ignore_index=True)
    with client.read("/pubg/aggregate/agg_match_stats_4.csv") as f:
        temp = pd.read_csv(f, usecols=[1, 3, 4, 9, 12]).replace(to_replace={
            'tpp': 2,
            'fpp': 1
        },
                                                                value=None)
        df = df.append(temp, ignore_index=True)

    #df = pd.read_csv('agg_match_stats_0.csv', usecols=[1, 3, 4, 9, 12], nrows=50000).replace(to_replace={'tpp': 2, 'fpp': 1}, value=None) # local read instead of through HDFS
    print(f'Shape of data read: {df.shape}')

    df = df[df['player_survive_time'] <
            2500]  # removing outlier survival times
    if mode == 1:
        X = df[df['match_mode'] == 1].drop(
            columns=['match_mode']).values.astype('double')
        T = df[df['match_mode'] == 1].iloc[:,
                                           4:].values.astype('double').reshape(
                                               -1, 1)
    if mode == 2:
        X = df[df['match_mode'] == 2].drop(
            columns=['match_mode']).values.astype('double')
        T = df[df['match_mode'] == 2].iloc[:,
                                           4:].values.astype('double').reshape(
                                               -1, 1)
    #print(f'X.shape: {X.shape}, T.shape: {T.shape}')

    frac = 0.8
    X_train, X_test, T_train, T_test = ml.partition(X, T, frac)
    train = partition_dataset(np.concatenate((X_train, T_train), axis=1))
    X_train, T_train = train[:, :4], train[:, 4:]

    network = [5]
    relu = True
    n_iterations = 500
    batch_size = 67000
    learn_rate = 10**-5

    Qnet = nn.NN_distributed(X_train.shape[1], network, T_train.shape[1], relu)
    net, err = Qnet.train_pytorch(X_train,
                                  T_train,
                                  n_iterations,
                                  batch_size,
                                  learn_rate,
                                  verbose=True)

    print(
        f'Final Train RMSE error: {err[-1].detach().cpu().numpy()}, training time: {net.time}'
    )
    Y_test = net.use_pytorch(X_test)  # predictions
    RMSE_net = np.sqrt(np.mean(
        (Y_test - T_test)**2))  # errors = predictions - targets
    print(f'Test RMSE: {RMSE_net}')
    print(
        f'Sample Target: {T_test[0][0]}, Predicted Value: {net.use_pytorch(X_test[0])[0]}'
    )  # sample prediction

    model = nn.NN_distributed(X_train.shape[1], network, T_train.shape[1],
                              relu)
    if mode == 1:
        model.load_state_dict(torch.load('Best network (FPP).pth'))
    if mode == 2:
        model.load_state_dict(torch.load('Best network (TPP).pth'))
    Y_test_best = model.use_pytorch(X_test)
    RMSE_model = np.sqrt(np.mean((Y_test_best - T_test)**2))
    print(f'Best network Test RMSE: {RMSE_model}')
    if RMSE_net < RMSE_model / 2:
        n_epochs = len(err)
        fig = plt.figure(figsize=(12, 12))
        plt.plot(list(range(1, n_epochs + 1)), err)
        plt.xlim(1 - 0.05 * n_epochs, n_epochs * 1.05)
        plt.xlabel('Epochs')
        plt.ylabel('Train RMSE')
        if mode == 1:
            torch.save(net.state_dict(), 'Best network (FPP).pth')
            plt.savefig('Error rate - best network (FPP).png')
        if mode == 2:
            torch.save(net.state_dict(), 'Best network (TPP).pth')
            plt.savefig('Error rate - best network (TPP).png')
        print('Saving as new best network')