Ejemplo n.º 1
0
def test_mnist():
    (train_x, train_y), (test_x, test_y) = mnist.load_data()
    val_x = train_x[50000:]
    val_y = train_y[50000:]
    train_x = train_x[:50000]
    train_y = train_y[:50000]
    batch_size = 200
    modle = models.Sequential()
    modle.add(layers.Linear(28, input_shape=(None, train_x.shape[1])))
    modle.add(layers.ReLU())
    modle.add(layers.Linear(10))
    modle.add(layers.ReLU())
    modle.add(layers.Linear(10))
    modle.add(layers.Softmax())
    acc = losses.categorical_accuracy.__name__
    modle.compile(losses.CrossEntropy(),
                  optimizers.SGD(lr=0.001),
                  metrics=[losses.categorical_accuracy])
    modle.summary()
    history = modle.train(train_x,
                          train_y,
                          batch_size,
                          epochs=32,
                          validation_data=(val_x, val_y))
    epochs = range(1, len(history["loss"]) + 1)
    plt.plot(epochs, history["loss"], 'ro', label="Traning loss")
    plt.plot(epochs, history["val_loss"], 'go', label="Validating loss")
    plt.plot(epochs, history[acc], 'r', label="Traning accuracy")
    plt.plot(epochs, history["val_" + acc], 'g', label="Validating accuracy")
    plt.title('Training/Validating loss/accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Loss/Accuracy')
    plt.legend()
    plt.show(block=True)
Ejemplo n.º 2
0
    def __init__(self,
                 input_size,
                 output_size,
                 hidden_layers_sizes,
                 loss=mtr.CrossEntropyLoss(),
                 learn_rate=0.01,
                 problem='classification',
                 scorer=mtr.AccuracyScore()):
        """
        Create multi-layer perceptron

        Args:
            input_size: neurons on input layer
            output_size: neurons on output layer
            hidden_layers_sizes: list of neurons on each hidden layer
            problem: problem to solve ('classification" or 'regression')
        """
        layer_sizes = [input_size] + hidden_layers_sizes + [output_size]
        layers = []

        for i in range(len(layer_sizes) - 2):
            layers.append(
                lrs.Dense(layer_sizes[i], layer_sizes[i + 1], learn_rate))
            layers.append(lrs.LeakyReLU())

        layers.append(lrs.Dense(layer_sizes[-2], layer_sizes[-1], learn_rate))

        if problem == 'classification':
            layers.append(lrs.Softmax())

        super().__init__(layers, loss, scorer)
Ejemplo n.º 3
0
def get_experiment_metrics(input_transform, output_transform):
    name_in = input_transform.__class__.__name__ if input_transform is not None else ""
    name_out = output_transform.__class__.__name__ if output_transform is not None else ""

    relative_in_transform = None
    relative_out_transform = None

    relative_in_transform = Percentage()
    if name_in == "":
        relative_out_transform = Percentage()
    elif name_in == CenterLogRatio.__name__ and name_out != layers.Softmax.__name__:
        relative_out_transform = layers.Softmax()

    return [
        [
            MeanSquaredErrorWrapper(y_true_transformer=input_transform,
                                    y_pred_transformer=None),
            MeanAbsoluteErrorWrapper(y_true_transformer=input_transform,
                                     y_pred_transformer=None),
            MeanAbsolutePercentageErrorWrapper(
                y_true_transformer=relative_in_transform,
                y_pred_transformer=relative_out_transform),
            BrayCurtisDissimilarity(y_true_transformer=relative_in_transform,
                                    y_pred_transformer=relative_out_transform),
            PearsonCorrelation(y_true_transformer=relative_in_transform,
                               y_pred_transformer=relative_out_transform),
            # SpearmanCorrelation(y_true_transformer=relative_in_transform,
            #                    y_pred_transformer=relative_out_transform),
            JensenShannonDivergence(y_true_transformer=relative_in_transform,
                                    y_pred_transformer=relative_out_transform),
            # CrossEntropy(y_true_transformer=relative_in_transform,
            #             y_pred_transformer=relative_out_transform),
        ],
        [
            MeanSquaredErrorWrapper(y_true_transformer=input_transform,
                                    y_pred_transformer=None),
            MeanAbsoluteErrorWrapper(y_true_transformer=input_transform,
                                     y_pred_transformer=None),
            MeanAbsolutePercentageErrorWrapper(
                y_true_transformer=relative_in_transform,
                y_pred_transformer=relative_out_transform),
            BrayCurtisDissimilarity(y_true_transformer=relative_in_transform,
                                    y_pred_transformer=relative_out_transform),
            PearsonCorrelation(y_true_transformer=relative_in_transform,
                               y_pred_transformer=relative_out_transform),
            # SpearmanCorrelation(y_true_transformer=relative_in_transform,
            #                    y_pred_transformer=relative_out_transform),
            JensenShannonDivergence(y_true_transformer=relative_in_transform,
                                    y_pred_transformer=relative_out_transform),
            # CrossEntropy(y_true_transformer=relative_in_transform,
            #             y_pred_transformer=relative_out_transform),
        ],
        [
            metrics.MeanAbsoluteError(name='mae'),
        ]
    ]
    def test_overfitting(cifar, momentum):
        training = cifar.get_named_batches('data_batch_1').subset(100)

        net = Network()
        net.add_layer(
            layers.Linear(cifar.input_size, 50, 0, initializers.Xavier()))
        net.add_layer(layers.ReLU(50))
        net.add_layer(
            layers.Linear(50, cifar.output_size, 0, initializers.Xavier()))
        net.add_layer(layers.Softmax(cifar.output_size))

        opt = MomentumSGD(net, initial_learning_rate=0.005, momentum=momentum)

        opt.train(training, training, 400)

        costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val,
                              opt.cost_train, opt.cost_val,
                              'images/overfit_mom{}.png'.format(momentum))
        show_plot('images/overfit_mom{}.png'.format(momentum))
    def test_vanilla(cifar):
        training = cifar.get_named_batches('data_batch_1')
        validation = cifar.get_named_batches('data_batch_2')

        net = Network()
        net.add_layer(
            layers.Linear(cifar.input_size, cifar.output_size, 0,
                          initializers.Xavier()))
        net.add_layer(layers.Softmax(cifar.output_size))

        opt = VanillaSGD(net,
                         initial_learning_rate=0.01,
                         decay_factor=0.99,
                         shuffle=True)

        opt.train(training, validation, 100, 500)

        costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val,
                              opt.cost_train, opt.cost_val,
                              'images/vanilla.png')
        show_plot('images/vanilla.png')
Ejemplo n.º 6
0
if __name__ == '__main__':
    import layers
    import datasets
    import initializers
    import matplotlib.pyplot as plt

    cifar = datasets.CIFAR10()
    training = cifar.get_named_batches('data_batch_1', limit=4)

    net = Network()
    net.add_layer(layers.Linear(cifar.input_size, 50, 0,
                                initializers.Xavier()))
    net.add_layer(layers.ReLU(50))
    net.add_layer(
        layers.Linear(50, cifar.output_size, 0, initializers.Xavier()))
    net.add_layer(layers.Softmax(cifar.output_size))

    Y = net.evaluate(training.images)
    print('Cost:', net.cost(training.one_hot_labels, None, Y))
    print('Accuracy: {:.2%}'.format(
        net.accuracy(training.one_hot_labels, None, Y)))

    plt.subplot(1, 3, 1)
    plt.imshow(Y)
    plt.yticks(range(10), cifar.labels)
    plt.xlabel('Image number')
    plt.title('Probabilities')

    plt.subplot(1, 3, 2)
    plt.imshow(cifar.label_encoder.transform(np.argmax(Y, axis=0)).T)
    plt.yticks([])
Ejemplo n.º 7
0
    batch_size = 50
    num_epochs = 100
    num_classes = 2
    hidden_units = 100
    hidden_units2 = 10
    dimensions = 2

    # PeaksData  da, SwissRollData, GMMData
    X_train, y_train, X_test, y_test = utils.get_data('PeaksData')
    X_train, y_train = shuffle(X_train, y_train)

    # gradient and jacobian tests
    grad_test_W(X_train, y_train)
    grad_test_b(X_train, y_train)
    jacobian_test_W(X_train, y_train)
    jacobian_test_b(X_train, y_train)
    grad_test_W_whole_network(X_train, y_train)
    grad_test_b_whole_network(X_train, y_train)

    model = models.MyNeuralNetwork()
    model.add(layers.Linear(dimensions, hidden_units))
    model.add(activations.ReLU())
    model.add(layers.Softmax(hidden_units, 5))
    optimizer = optimizers.SGD(model.parameters, lr=0.1)
    losses, train_accuracy, test_accuracy = model.fit(X_train, y_train, X_test,
                                                      y_test, batch_size,
                                                      num_epochs, optimizer)

    # plotting
    utils.plot_scores(train_accuracy, test_accuracy)
Ejemplo n.º 8
0
def create_and_train(training: Batch,
                     validation: Batch,
                     epochs: int,
                     hidden_size: int,
                     regularization: float,
                     initial_learning_rate: float,
                     decay_factor: float,
                     momentum: float,
                     train_id: str,
                     test: Batch = None):
    """
    Create and train a 2 layer network:
    - subtract mean of the training set
    - linear layer
    - relu
    - linear layer
    - softmax

    The only parameters that are fixed are the layer initializers
    and the batch size.

    :param train_id:
    :param training:
    :param validation:
    :param epochs:
    :param hidden_size:
    :param regularization:
    :param initial_learning_rate:
    :param decay_factor:
    :param momentum:
    :return:
    """
    # Mean of the training set
    mu = training.mean()

    # Definition of the network
    net = Network()
    net.add_layer(layers.BatchNormalization(CIFAR10.input_size, mu))
    net.add_layer(layers.Linear(CIFAR10.input_size, hidden_size, regularization, initializers.Xavier()))
    net.add_layer(layers.ReLU(hidden_size))
    net.add_layer(layers.Linear(hidden_size, CIFAR10.output_size, regularization, initializers.Xavier()))
    net.add_layer(layers.Softmax(CIFAR10.output_size))

    # Training
    opt = optimizers.MomentumSGD(net, initial_learning_rate, decay_factor, True, momentum)
    opt.train(training, validation, epochs, 10000)

    # Plotting
    plot = costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val,
                                 opt.cost_train, opt.cost_val,
                                 'images/{}.png'.format(train_id))

    result = {
        'epochs': epochs,
        'hidden_size': hidden_size,
        'regularization': regularization,
        'initial_learning_rate': initial_learning_rate,
        'decay_factor': decay_factor,
        'momentum': momentum,
        # 'net': net,
        # 'opt': opt,
        'epoch_nums': opt.epoch_nums,
        'cost_train': opt.cost_train,
        'acc_train': opt.acc_train,
        'cost_val': opt.cost_val,
        'acc_val': opt.acc_val,
        'final_cost_train': opt.cost_train[-1],
        'final_acc_train': opt.acc_train[-1],
        'final_cost_val': opt.cost_val[-1],
        'final_acc_val': opt.acc_val[-1],
        'plot': plot
    }

    # Test set
    if test is not None:
        result['final_cost_test'], result['final_acc_test'] = net.cost_accuracy(test)
        result['confusion_matrix'] = confusion_matrix_plot(net, test,
                                                           CIFAR10().labels,
                                                           'images/{}_conf.png'.format(train_id))

    return result
Ejemplo n.º 9
0
 def __init__(self, *args):
     self.layer = layers.Softmax()
     self.inputs = args
Ejemplo n.º 10
0
def grad_test_b(X_train, y_train):
    softmax_in = 2
    softmax_out = 5
    model = models.MyNeuralNetwork()
    model.add(layers.Softmax(softmax_in, softmax_out))
    model.init()
    for p in model.parameters:
        p.grad = 0.

    eps0 = 1
    eps = np.array([(0.5**i) * eps0 for i in range(10)])

    d = np.random.random((1, 5))
    d = d / np.sum(d)
    grad_diff = []

    x_data = np.array([X_train[0]])
    x_label = np.array([y_train[0]])

    for epss in eps:
        model_grad = copy.deepcopy(model)
        probabilities_grad = model_grad.forward(x_data)
        model2 = copy.deepcopy(model)
        model2.graph[0].bias.data += d * epss
        probabilities_grad2 = model2.forward(x_data)
        grad_diff.append(
            np.abs(
                utils.cross_entropy_loss(probabilities_grad2, x_label) -
                utils.cross_entropy_loss(probabilities_grad, x_label)))

    fig, axs = plt.subplots(2, 2, figsize=(12, 8), constrained_layout=True)
    fig.suptitle('Gradient test by b', fontsize=16)

    axs[0, 0].plot(eps, grad_diff)
    axs[0, 0].set_xlabel('$\epsilon$')
    axs[0, 0].set_title('$|f(x+\epsilon d) - f(x)|$')

    axs[0, 1].plot(
        range(len(grad_diff) - 1),
        [grad_diff[i + 1] / grad_diff[i] for i in range(len(grad_diff) - 1)])
    axs[0, 1].set_xlabel('$i$')
    axs[0, 1].set_title('rate of decrease')
    axs[0, 1].set_ylim([0, 1])

    grad_diff = []
    for epss in eps:
        model_grad = copy.deepcopy(model)
        probabilities_grad = copy.deepcopy(model_grad.forward(x_data))
        model2 = copy.deepcopy(model)
        model2.graph[0].bias.data += d * epss
        probabilities_grad2 = copy.deepcopy(model2.forward(x_data))
        model2.backward(x_label)
        grad_x = model2.graph[0].bias.grad
        grad_diff.append(
            np.abs(
                utils.cross_entropy_loss(probabilities_grad2, x_label) -
                utils.cross_entropy_loss(probabilities_grad, x_label) -
                epss * np.dot(d.flatten().T, grad_x.flatten())))

    axs[1, 0].plot(eps, grad_diff)
    axs[1, 0].set_xlabel('$\epsilon$')
    axs[1, 0].set_title('$|f(x+\epsilon d) - f(x) - \epsilon d^{T} grad(x)|$')

    axs[1, 1].plot(
        range(len(grad_diff) - 1),
        [grad_diff[i + 1] / grad_diff[i] for i in range(len(grad_diff) - 1)])
    axs[1, 1].set_xlabel('$i$')
    axs[1, 1].set_title('rate of decrease')
    axs[1, 1].set_ylim([0, 1])

    plt.show()
Ejemplo n.º 11
0
def jacobian_test_b(X_train, y_train):
    softmax_in = 2
    softmax_out = 5
    hidden_units = 10
    model = models.MyNeuralNetwork()
    model.add(layers.Linear(softmax_in, hidden_units))
    model.add(activations.Tanh())
    model.add(layers.Softmax(hidden_units, softmax_out))
    model.init()
    for p in model.parameters:
        p.grad = 0.

    eps0 = 1
    eps = np.array([(0.5**i) * eps0 for i in range(10)])

    d = np.random.random((1, 10))
    d = d / np.sum(d)

    x_data = np.array([X_train[0]])
    x_label = np.array([y_train[0]])

    grad_diff = []

    for epss in eps:
        model_grad = copy.deepcopy(model)
        probabilities_grad = model_grad.forward(x_data)
        model2 = copy.deepcopy(model)
        model2.graph[0].bias.data += d * epss
        probabilities_grad2 = model2.forward(x_data)

        f_x_eps_d = model2.graph[1].activation_output
        f_x = model_grad.graph[1].activation_output

        grad_diff.append(LA.norm(f_x_eps_d - f_x))

    fig, axs = plt.subplots(2, 2, figsize=(12, 8), constrained_layout=True)
    fig.suptitle('Jacobian test by b', fontsize=16)

    axs[0, 0].plot(eps, grad_diff)
    axs[0, 0].set_xlabel('$\epsilon$')
    axs[0, 0].set_title('$||f(x+\epsilon d) - f(x)||$')

    axs[0, 1].plot(
        range(len(grad_diff) - 1),
        [grad_diff[i + 1] / grad_diff[i] for i in range(len(grad_diff) - 1)])
    axs[0, 1].set_xlabel('$i$')
    axs[0, 1].set_title('rate of decrease')
    axs[0, 1].set_ylim([0, 1])

    grad_diff = []
    for epss in eps:

        model_grad = copy.deepcopy(model)
        probabilities_grad = copy.deepcopy(model_grad.forward(x_data))
        model2 = copy.deepcopy(model)
        model2.graph[0].bias.data += d * epss
        probabilities_grad2 = copy.deepcopy(model2.forward(x_data))
        model_grad.backward(x_label)

        f_x_eps_d = model2.graph[1].activation_output
        f_x = model_grad.graph[1].activation_output

        grad = model_grad.graph[0].bias.grad
        JacMV = epss * np.matmul(d.T, grad)

        diff = LA.norm(f_x_eps_d - f_x - JacMV)
        grad_diff.append(diff * epss)

    axs[1, 0].plot(eps, grad_diff)
    axs[1, 0].set_xlabel('$\epsilon$')
    axs[1, 0].set_title('$||f(x+\epsilon d) - f(x) -  JavMV(x, \epsilon d)||$')

    axs[1, 1].plot(
        range(len(grad_diff) - 1),
        [grad_diff[i + 1] / grad_diff[i] for i in range(len(grad_diff) - 1)])
    axs[1, 1].set_xlabel('$i$')
    axs[1, 1].set_title('rate of decrease')
    axs[1, 1].set_ylim([0, 1])

    plt.show()