Beispiel #1
0
def test_on_parameters(n_hiddenLayers, n_epochs, showCost):

    # Split data into train and test sections
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=test_ratio,
                                                        random_state=42)
    print(X.shape, X_train.shape, X_test.shape)
    #Train the network on train data
    params = nn.train_nn(X_train,
                         Y_train,
                         n_hiddenLayers,
                         n_epochs,
                         adaptive_lRate,
                         showCost=showCost)

    #Test neural network on test data
    A2, cache = nn.f_propagate(X_test, params)
    acc = "{:.4f}".format(nn.accuracy(A2, Y_test))
    print('Accuracy on test data: ' + acc + '%')

    #Test neural network on train data
    A2, cache = nn.f_propagate(X_train, params)
    acc = "{:.4f}".format(nn.accuracy(A2, Y_train))
    print('Accuracy on train data: ' + acc + '%')

    # #Test neural network on all data
    A2, cache = nn.f_propagate(X, params)
    acc = "{:.4f}".format(nn.accuracy(A2, Y))
    print('Accuracy on all available data: ' + acc + '%')
Beispiel #2
0
def train():
    print("start training")
    mcts.MCTS.get_tree_and_edges(reset=True)
    print("start training")
    neural_network.nn_predictor.reset_nn_check_pts()
    nn_training_set = None

    iterations = 50

    for _ in range(iterations):
        print('iterations:', iterations)
        player1 = player.Zero_Player('x',
                                     'Bot_ONE',
                                     nn_type='best',
                                     temperature=1)
        player2 = player.Zero_Player('x',
                                     'Bot_ONE',
                                     nn_type='best',
                                     temperature=1)
        self_play_game = game.Game(player1, player2)
        self_play_results = self_play_game.play(500)
        augmented_self_play_results = neural_network.augment_data_set(
            self_play_results)

        mcts.MCTS.update_mcts_edges(augmented_self_play_results)
        nn_training_set = neural_network.update_nn_training_set(
            self_play_results, nn_training_set)

        neural_network.train_nn(nn_training_set)

        player1 = player.Zero_Player('x',
                                     'Bot_ONE',
                                     nn_type='last',
                                     temperature=0)
        player2 = player.Zero_Player('x',
                                     'Bot_ONE',
                                     nn_type='best',
                                     temperature=0)

        nn_test_game = game.Game(player1, player2)
        wins_player1, wins_player2 = nn_test_game.play_symmetric(100)

        if wins_player1 >= wins_player2:
            neural_network.nn_predictor.BEST = neural_network.nn_predictor.LAST
Beispiel #3
0
def test_inc_and_dec():

    plotX = []
    plotY = []
    plotZ = []
    size = 10

    incArr = np.linspace(1, 1.1, size)
    decArr = np.linspace(0.5, 1, size)

    for inc in range(0, size):
        for dec in range(0, size):
            acc_arr = []
            adaptive_lRate = {
                'InitialRate': 0.01,
                'DecrementVar': decArr[dec],
                'IncrementVar': incArr[inc],
                'ErrorRatio': 1.04
            }
            for i in range(1, 4):

                X_train, X_test, Y_train, Y_test = train_test_split(
                    X, Y, test_size=test_ratio, random_state=randint(1, 100))
                params = nn.train_nn(X_train,
                                     Y_train,
                                     8,
                                     500,
                                     adaptive_lRate,
                                     showCost=False)

                pred_test, pred_cache = nn.f_propagate(X_test, params)
                acc_test = nn.accuracy(pred_test, Y_test)

                pred_train, train_cache = nn.f_propagate(X_train, params)
                acc_train = nn.accuracy(pred_train, Y_train)

                pred_all, all_cache = nn.f_propagate(X, params)
                acc_all = nn.accuracy(pred_all, Y)

                acc_arr.append((acc_test + acc_train + acc_all) / 3)

            plotX.append(incArr[inc])
            plotY.append(decArr[dec])
            plotZ.append(np.mean(acc_arr))  #plotZ.append(acc_test)
            print('Inc: %f - Dec: %f - Average Acc: %f' %
                  (incArr[inc], decArr[dec], np.mean(acc_arr)))

    fig = plt.figure()
    ax = Axes3D(fig)
    ax.set_xlabel('Increment')
    ax.set_ylabel('Decrement')
    ax.set_zlabel('Accuracy')
    surf = ax.plot_trisurf(plotX, plotY, plotZ, linewidth=0.1, cmap='summer')
    plt.savefig('./plots/plot.png')
    plt.show()
Beispiel #4
0
def test_range__hidden_and_epochs(hidden_start, hidden_end, hidden_step,
                                  epochs_start, epochs_end, epochs_step,
                                  iterate_n):

    plotX = []
    plotY = []
    plotZ = []
    for hidden in range(hidden_start, hidden_end + 1, hidden_step):
        for epoch in range(epochs_start, epochs_end + 1, epochs_step):
            acc_arr = []
            for i in range(1, iterate_n + 1):

                X_train, X_test, Y_train, Y_test = train_test_split(
                    X, Y, test_size=test_ratio, random_state=randint(1, 100))
                params = nn.train_nn(X_train,
                                     Y_train,
                                     hidden,
                                     epoch,
                                     const_lRate,
                                     showCost=False)

                pred_test, pred_cache = nn.f_propagate(X_test, params)
                acc_test = nn.accuracy(pred_test, Y_test)

                pred_train, train_cache = nn.f_propagate(X_train, params)
                acc_train = nn.accuracy(pred_train, Y_train)

                pred_all, all_cache = nn.f_propagate(X, params)
                acc_all = nn.accuracy(pred_all, Y)

                acc_arr.append((acc_test + acc_train + acc_all) / 3)
            plotX.append(hidden)
            plotY.append(epoch)
            plotZ.append(np.mean(acc_arr))  #plotZ.append(acc_test)
            print('Hidden: %i - Epoch: %i - Average Acc: %f' %
                  (hidden, epoch, np.mean(acc_arr)))

    fig = plt.figure()
    ax = Axes3D(fig)
    ax.set_xlabel('Neurons in hidden layer')
    ax.set_ylabel('Epochs')
    ax.set_zlabel('Accuracy')
    surf = ax.plot_trisurf(plotX, plotY, plotZ, linewidth=0.1, cmap='winter')
    plt.savefig('./plots/plot.png')
    plt.show()
Beispiel #5
0
def test_adaptive_lRate():
    rateAcc = []
    singleAcc = []
    ratesArr = np.linspace(0.001, 0.2, 50)
    for i in range(0, 50):
        for j in range(1, 3):

            adaptive_lRate = {
                'InitialRate': ratesArr[i],
                'DecrementVar': 0.7,
                'IncrementVar': 1.05,
                'ErrorRatio': 1.04
            }

            X_train, X_test, Y_train, Y_test = train_test_split(
                X, Y, test_size=test_ratio, random_state=i * j)
            params = nn.train_nn(X_train,
                                 Y_train,
                                 8,
                                 500,
                                 adaptive_lRate,
                                 showCost=False)

            pred_test, pred_cache = nn.f_propagate(X_test, params)
            acc_test = nn.accuracy(pred_test, Y_test)

            pred_train, train_cache = nn.f_propagate(X_train, params)
            acc_train = nn.accuracy(pred_train, Y_train)

            pred_all, all_cache = nn.f_propagate(X, params)
            acc_all = nn.accuracy(pred_all, Y)

            singleAcc.append(np.mean([acc_test, acc_train, acc_all]))
        rateAcc.append(np.mean(singleAcc))
        print('Error rate: %f, Acc: %f' % (ratesArr[i], np.mean(singleAcc)))
        singleAcc = []

    plt.plot(ratesArr, rateAcc)
    plt.xlabel('Initial Rate')
    plt.ylabel('Accuracy')
    plt.savefig('./plots/plot.png')
    plt.show()
Beispiel #6
0
def test_params(n_hidden, n_epochs, n_iterations):
    acc_arr = []
    lowest_test = lowest_train = lowest_all = 100
    incidents = 0

    for i in range(1, n_iterations + 1):
        X_train, X_test, Y_train, Y_test = train_test_split(
            X, Y, test_size=test_ratio, random_state=i)
        params = nn.train_nn(X_train,
                             Y_train,
                             n_hidden,
                             n_epochs,
                             const_lRate,
                             showCost=False)

        pred_test, pred_cache = nn.f_propagate(X_test, params)
        acc_test = nn.accuracy(pred_test, Y_test)

        pred_train, train_cache = nn.f_propagate(X_train, params)
        acc_train = nn.accuracy(pred_train, Y_train)

        pred_all, all_cache = nn.f_propagate(X, params)
        acc_all = nn.accuracy(pred_all, Y)

        if (acc_test < lowest_test): lowest_test = acc_test
        if (acc_train < lowest_train): lowest_train = acc_train
        if (acc_all < lowest_all): lowest_all = acc_all
        if (acc_all < 90 or acc_train < 90 or acc_test < 90): incidents += 1
        acc_arr.append((acc_test + acc_train + acc_all) / 3)
        print('Iteration: %i, Test: %f, Train: %f, All: %f' %
              (i, acc_test, acc_train, acc_all))

    print('================================================')
    print(
        'Average accuracy for all iterations: %f, Number of incidents (<90 acc): %i'
        % (np.mean(acc_arr), incidents))
    print('Lowest accuracies >>> Test: %f, Train: %f, All: %f' %
          (lowest_test, lowest_train, lowest_all))
Beispiel #7
0
def test_const_lRate(l_start, l_end):
    rateAcc = []
    singleAcc = []
    ratesArr = np.linspace(l_start, l_end, 100)
    for i in range(0, 100):
        for j in range(1, 5):

            X_train, X_test, Y_train, Y_test = train_test_split(
                X, Y, test_size=test_ratio, random_state=i * j)
            params = nn.train_nn(X_train,
                                 Y_train,
                                 8,
                                 500,
                                 ratesArr[i],
                                 showCost=False)

            pred_test, pred_cache = nn.f_propagate(X_test, params)
            acc_test = nn.accuracy(pred_test, Y_test)

            pred_train, train_cache = nn.f_propagate(X_train, params)
            acc_train = nn.accuracy(pred_train, Y_train)

            pred_all, all_cache = nn.f_propagate(X, params)
            acc_all = nn.accuracy(pred_all, Y)

            singleAcc.append(np.mean([acc_test, acc_train, acc_all]))
        rateAcc.append(np.mean(singleAcc))
        print('Learning_rate: %f, Acc: %f' % (ratesArr[i], np.mean(singleAcc)))
        singleAcc = []

    plt.plot(ratesArr, rateAcc)
    plt.xlabel('Learning rate')
    plt.ylabel('Accuracy')

    plt.savefig('./plots/plot.png')
    plt.show()
Beispiel #8
0
def compare_rates():

    plotX = []
    plotConst = []
    plotAdapt = []

    for epoch in range(0, 400, 10):
        for j in range(1, 5):

            singleConst = []
            singleAdapt = []

            X_train, X_test, Y_train, Y_test = train_test_split(
                X, Y, test_size=test_ratio, random_state=randint(1, 100))

            params_const = nn.train_nn(X_train,
                                       Y_train,
                                       8,
                                       epoch,
                                       const_lRate,
                                       showCost=False)
            params_adapt = nn.train_nn(X_train,
                                       Y_train,
                                       8,
                                       epoch,
                                       adaptive_lRate,
                                       showCost=False)

            pred_test_const, pred_cache = nn.f_propagate(X_test, params_const)
            acc_test_const = nn.accuracy(pred_test_const, Y_test)
            pred_train_const, train_cache = nn.f_propagate(
                X_train, params_const)
            acc_train_const = nn.accuracy(pred_train_const, Y_train)
            pred_all_const, all_cache = nn.f_propagate(X, params_const)
            acc_all_const = nn.accuracy(pred_all_const, Y)

            pred_test_adapt, pred_cache = nn.f_propagate(X_test, params_adapt)
            acc_test_adapt = nn.accuracy(pred_test_adapt, Y_test)
            pred_train_adapt, train_cache = nn.f_propagate(
                X_train, params_adapt)
            acc_train_adapt = nn.accuracy(pred_train_adapt, Y_train)
            pred_all_adapt, all_cache = nn.f_propagate(X, params_adapt)
            acc_all_adapt = nn.accuracy(pred_all_adapt, Y)

            singleConst.append(
                np.mean([acc_test_const, acc_train_const, acc_all_const]))
            singleAdapt.append(
                np.mean([acc_test_adapt, acc_train_adapt, acc_all_adapt]))

        plotX.append(epoch)
        plotConst.append(np.mean(singleConst))
        plotAdapt.append(np.mean(singleAdapt))
        print('Epoch: %f, Const acc: %f, Adaptive acc: %f' %
              (epoch, np.mean(singleConst), np.mean(singleAdapt)))

    plt.plot(plotX, plotConst, label='Constant Learning Rate')
    plt.plot(plotX, plotAdapt, label='Adaptive Learning Rate')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
               loc='lower left',
               ncol=2,
               mode="expand",
               borderaxespad=0.)
    plt.savefig('./plots/plot.png')
    plt.show()
Beispiel #9
0
def test_mnist(learning_rate=0.01,
               L1_reg=0.00,
               L2_reg=0.0001,
               n_epochs=100,
               batch_size=128,
               n_hidden=500,
               n_hiddenLayers=3,
               normalization=True,
               eps=1e-4,
               verbose=False,
               smaller_set=True,
               loss='norm',
               lr_decay=False,
               binary=True):
    """
    Wrapper function for training and testing MLP

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.

    :type loss: string
    :param loss: to use hinge loss or normal loss.

    :type lr_decay: boolean
    :param lr_decay: to use learning_rate decay

    :type binary: boolean
    :param binary: to binarize the output

    :type normalization: boolean
    :param normalization: normalization output or not

    :type eps: float
    :param eps: normalization variable
    """

    # load the dataset; download the dataset if it is not present
    datasets = load_data_mnist(theano_shared=True)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    train_data_y_mat = datasets[3]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels
    y_mat = T.matrix('y_mat')
    epoch = T.lscalar('epoch')

    rng = numpy.random.RandomState(1234)

    # construct a neural network, either MLP or CNN.
    classifier = myMLP(rng=rng,
                       input=x,
                       n_in=28 * 28,
                       n_hidden=n_hidden,
                       n_hiddenLayers=n_hiddenLayers,
                       n_out=10,
                       binary=binary,
                       normalization=normalization,
                       eps=eps)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    # Loss can chosen as hinge loss or nll loss
    if loss == 'norm':
        cost = (classifier.negative_log_likelihood(y) +
                L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr)
    else:
        cost = classifier.logRegressionLayer.hinge(y_mat)

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size],
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size],
        })

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams

    # According to paper gradient is calculated using binarized weights as
    # same weights are used during forward propagation
    if binary:
        gparams = [T.grad(cost, param) for param in classifier.params_bin]
    else:
        gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs
    # change learning rate depending upon input
    # we are following exponential decay with
    # learning rate = learning_rate_start * (learning_rate_final / learning_rate_start) ** (epoch/ n_epochs)

    if lr_decay:
        lr_start = learning_rate
        lr_final = 1e-6
        updates = [
            (param_i,
             T.cast(
                 T.clip(
                     param_i - (lr_start *
                                (lr_final / lr_start)**(epoch / 25)) * grad_i,
                     -1, 1), theano.config.floatX))
            for param_i, grad_i in zip(classifier.params, gparams)
        ]
    else:
        updates = [(param_i,
                    T.cast(T.clip(param_i - learning_rate * grad_i, -1, 1),
                           theano.config.floatX))
                   for param_i, grad_i in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`

    # Dependin upon lr_decay and loss function calculation we need to pass different
    # parameters to training model
    if loss == 'norm':
        if lr_decay:
            train_model = theano.function(
                inputs=[index, epoch],
                outputs=cost,
                updates=updates,
                givens={
                    x:
                    train_set_x[index * batch_size:(index + 1) * batch_size],
                    y:
                    train_set_y[index * batch_size:(index + 1) * batch_size],
                })
        else:
            train_model = theano.function(
                inputs=[index],
                outputs=cost,
                updates=updates,
                givens={
                    x:
                    train_set_x[index * batch_size:(index + 1) * batch_size],
                    y:
                    train_set_y[index * batch_size:(index + 1) * batch_size],
                })
    else:
        if lr_decay:
            train_model = theano.function(
                inputs=[index, epoch],
                outputs=cost,
                updates=updates,
                givens={
                    x:
                    train_set_x[index * batch_size:(index + 1) * batch_size],
                    y_mat:
                    train_set_y[index * batch_size:(index + 1) * batch_size],
                })
        else:
            train_model = theano.function(
                inputs=[index],
                outputs=cost,
                updates=updates,
                givens={
                    x:
                    train_set_x[index * batch_size:(index + 1) * batch_size],
                    y_mat:
                    train_set_y[index * batch_size:(index + 1) * batch_size],
                })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    result = train_nn(train_model, validate_model, test_model, n_train_batches,
                      n_valid_batches, n_test_batches, n_epochs, verbose,
                      lr_decay)

    # plot_graph(result[2])
    return result
Beispiel #10
0
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100,
             batch_size=128, n_hidden=500, n_hiddenLayers=3, activation_function=T.tanh,
             verbose=False, data_path='data/mfcc_songs_10_{}.npy'):
    """
    Wrapper function for training and testing MLP

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.

    """

    # load the dataset; download the dataset if it is not present
    datasets = load_data(data_path=data_path)

    train_set_x, train_set_y = datasets[0][0]
    valid_set_x, valid_set_y = datasets[0][1]
    test_set_x, test_set_y = datasets[0][2]

    n_output_neurons = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct a neural network, either MLP or CNN.
    classifier = myMLP(
        rng=rng,
        input=x,
        n_in=1200,
        n_hidden=n_hidden,
        n_hiddenLayers=n_hiddenLayers,
        n_out=n_output_neurons,
        activation_function=activation_function
    )

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    return train_nn(train_model, validate_model, test_model,
                    n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
Beispiel #11
0
def test_mlp(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.0001,
             n_epochs=100,
             batch_size=128,
             n_hidden=500,
             n_hiddenLayers=3,
             activation_function=T.tanh,
             verbose=False,
             data_path='data/mfcc_songs_10_{}.npy'):
    """
    Wrapper function for training and testing MLP

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient.

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization).

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization).

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer.

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type n_hidden: int or list of ints
    :param n_hidden: number of hidden units. If a list, it specifies the
    number of units in each hidden layers, and its length should equal to
    n_hiddenLayers.

    :type n_hiddenLayers: int
    :param n_hiddenLayers: number of hidden layers.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type smaller_set: boolean
    :param smaller_set: to use the smaller dataset or not to.

    """

    # load the dataset; download the dataset if it is not present
    datasets = load_data(data_path=data_path)

    train_set_x, train_set_y = datasets[0][0]
    valid_set_x, valid_set_y = datasets[0][1]
    test_set_x, test_set_y = datasets[0][2]

    n_output_neurons = datasets[1]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct a neural network, either MLP or CNN.
    classifier = myMLP(rng=rng,
                       input=x,
                       n_in=1200,
                       n_hidden=n_hidden,
                       n_hiddenLayers=n_hiddenLayers,
                       n_out=n_output_neurons,
                       activation_function=activation_function)

    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    return train_nn(train_model, validate_model, test_model, n_train_batches,
                    n_valid_batches, n_test_batches, n_epochs, verbose)
Beispiel #12
0
def test_cifar10(learning_rate=0.01,
                 n_epochs=500,
                 nkerns=[128, 256, 512],
                 filter_shape=3,
                 batch_size=200,
                 verbose=False,
                 normal=False,
                 smaller_set=True,
                 std_normal=2,
                 binary=True,
                 normalization=True,
                 eps=1e-4):
    """
    Wrapper function for testing LeNet on SVHN dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer

    :type batch_size: int
    :param batch_szie: number of examples in minibatch.

    :type verbose: boolean
    :param verbose: to print out epoch summary or not to.

    :type binary: boolean
    :param binary: to binarize the output

    :type normalization: boolean
    :param normalization: normalization output or not

    :type eps: float
    :param eps: normalization variable    
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data_cifar10(theano_shared=True)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # Construct the first convolutional pooling layer
    layer0 = LeNetConvPoolLayer(rng,
                                input=layer0_input,
                                image_shape=(batch_size, 3, 32, 32),
                                filter_shape=(nkerns[0], 3, filter_shape,
                                              filter_shape),
                                poolsize=(1, 1),
                                normal=normal,
                                std_normal=std_normal,
                                binary=binary,
                                normalization=normalization,
                                eps=eps)
    img_size = (32 - filter_shape + 1)
    layer00 = LeNetConvPoolLayer(rng,
                                 input=layer0.output,
                                 image_shape=(batch_size, nkerns[0], img_size,
                                              img_size),
                                 filter_shape=(nkerns[0], nkerns[0],
                                               filter_shape, filter_shape),
                                 poolsize=(2, 2),
                                 normal=normal,
                                 std_normal=std_normal,
                                 binary=binary,
                                 normalization=normalization,
                                 eps=eps)

    # Construct the second convolutional pooling layer
    img_size = (img_size - filter_shape + 1) // 2
    layer1 = LeNetConvPoolLayer(rng,
                                input=layer00.output,
                                image_shape=(batch_size, nkerns[0], img_size,
                                             img_size),
                                filter_shape=(nkerns[1], nkerns[0],
                                              filter_shape, filter_shape),
                                poolsize=(1, 1),
                                normal=normal,
                                std_normal=std_normal,
                                binary=binary,
                                normalization=normalization,
                                eps=eps)

    img_size = (img_size - filter_shape + 1)
    layer11 = LeNetConvPoolLayer(rng,
                                 input=layer1.output,
                                 image_shape=(batch_size, nkerns[1], img_size,
                                              img_size),
                                 filter_shape=(nkerns[1], nkerns[1],
                                               filter_shape, filter_shape),
                                 poolsize=(2, 2),
                                 normal=normal,
                                 std_normal=std_normal,
                                 binary=binary,
                                 normalization=normalization,
                                 eps=eps)

    img_size = (img_size - filter_shape + 1) // 2
    layer2 = LeNetConvPoolLayer(rng,
                                input=layer11.output,
                                image_shape=(batch_size, nkerns[1], img_size,
                                             img_size),
                                filter_shape=(nkerns[2], nkerns[1],
                                              filter_shape, filter_shape),
                                poolsize=(2, 2),
                                normal=normal,
                                std_normal=std_normal,
                                binary=binary,
                                normalization=normalization,
                                eps=eps)

    # img_size = (img_size - filter_shape + 1)
    # layer22 = LeNetConvPoolLayer(
    #     rng,
    #     input=layer2.output,
    #     image_shape=(batch_size, nkerns[2], img_size, img_size),
    #     filter_shape=(nkerns[2], nkerns[2], filter_shape, filter_shape),
    #     poolsize=(2, 2),
    #     normal=normal,
    #     std_normal=std_normal,
    #     binary=binary
    # )
    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    layer3_input = layer2.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    img_size = (img_size - filter_shape + 1) // 2
    layer3 = HiddenLayer(rng,
                         input=layer3_input,
                         n_in=nkerns[2] * img_size * img_size,
                         n_out=1024,
                         activation=T.nnet.sigmoid,
                         binary=binary,
                         normalization=normalization,
                         epsilon=eps)

    layer5 = HiddenLayer(rng,
                         input=layer3.output,
                         n_in=1024,
                         n_out=1024,
                         activation=T.nnet.sigmoid,
                         binary=binary,
                         normalization=normalization,
                         epsilon=eps)

    layer6 = HiddenLayer(rng,
                         input=layer5.output,
                         n_in=1024,
                         n_out=1024,
                         activation=T.nnet.sigmoid,
                         binary=binary,
                         normalization=normalization,
                         epsilon=eps)

    # classify the values of the fully-connected sigmoidal layer
    layer4 = LogisticRegression(input=layer6.output,
                                n_in=1024,
                                n_out=10,
                                binary=binary)

    # the cost we minimize during training is the NLL of the model
    cost = layer4.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        [index],
        layer4.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    #create a list of all model parameters to be fit by gradient descent
    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
    params = params + layer11.params + layer00.params  # + layer22.params
    params = params + layer5.params + layer6.params

    params_bin = layer4.params_bin + layer3.params_bin + layer2.params_bin + layer1.params_bin + layer0.params_bin
    params_bin = params_bin + layer11.params_bin + layer00.params_bin  # + layer22.params_bin
    params_bin = params_bin + layer5.params_bin + layer6.params_bin

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params_bin)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        # (param_i, param_i - learning_rate * grad_i)
        (param_i,
         T.cast(T.clip(param_i - learning_rate * grad_i, -1, 1),
                theano.config.floatX))
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    result = train_nn(train_model, validate_model, test_model, n_train_batches,
                      n_valid_batches, n_test_batches, n_epochs, verbose)

    return result
Beispiel #13
0
df = fe.read_csv(FILEPATH_X)

df = fe.one_hot_encode(df, ['Formation'])
df = fe.one_hot_encode(df, ['YardLineDirection'])
df = fe.one_hot_encode(df, ['OffenseTeam'])
df = fe.one_hot_encode(df, ['DefenseTeam'])

df = fe.run_pca(df)

labels = fe.read_csv(FILEPATH_Y)

labels = fe.label_encode(labels)

X_train, X_test, y_train, y_test = fe.split_train_test(df, labels, TEST_SIZE)

num_features = fe.get_num_features(X_train)

classifier = nn.build_nn(num_features)
classifier = nn.compile_nn(classifier)
nn.train_nn(classifier, X_train, y_train)

#classifier.save('play_predictor.h5')

#from keras.models import load_model
#import neural_network as nn

#classifier = load_model('play_predictor.h5')
#cm, y_pred = nn.test_nn(classifier, X_test, y_test)
#print(cm)