def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20):

    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda optimizer: GradientBasedPredictor(
            function = MultiLayerPerceptron(
                layer_sizes=[hidden_size, dataset.n_categories],
                input_size = dataset.input_size,
                hidden_activation='sig',
                output_activation='lin',
                w_init = normal_w_init(mag = 0.01, seed = 5)
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = optimizer,
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)),
            'adamax': make_mlp(AdaMax(alpha = 1e-3)),
            },
        minibatch_size = 20,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
def compare_example_predictors(
        n_epochs = 5,
        n_tests = 20,
        minibatch_size = 10,
        test_mode = False
    ):
    """
    This demo shows how we can compare different online predictors.  The demo trains both predictors on the dataset,
    returning an object that contains the results.

    :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break.
    """

    dataset = get_mnist_dataset(flat = True)
    # "Flatten" the 28x28 inputs to a 784-d vector

    if test_mode:
        # Shorten the dataset so we run through it quickly in test mode.
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 3

    # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest.
    # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and
    #   then compile it into an IPredictor object
    # - The Perceptron directly implements the IPredictor interface.
    # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method.
    learning_curve_data = compare_predictors(
        dataset = dataset,
        online_predictors = {
            'Perceptron': Perceptron(
                w = np.zeros((dataset.input_size, dataset.n_categories)),
                alpha = 0.001
                ).to_categorical(n_categories = dataset.n_categories),  # .to_categorical allows the perceptron to be trained on integer labels.
            'MLP': GradientBasedPredictor(
                function = MultiLayerPerceptron(
                    layer_sizes=[500, dataset.n_categories],
                    input_size = dataset.input_size,
                    hidden_activation='sig',  # Sigmoidal hidden units
                    output_activation='softmax',  # Softmax output unit, since we're doing multinomial classification
                    w_init = normal_w_init(mag = 0.01, seed = 5)
                ),
                cost_function = negative_log_likelihood_dangerous,  # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax.
                optimizer = SimpleGradientDescent(eta = 0.1),
                ).compile(),  # .compile() returns an IPredictor
            },
        offline_predictors={
            'RF': RandomForestClassifier(n_estimators = 40)
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct  # Compares one-hot
        )
    # Results is a LearningCurveData object
    return learning_curve_data
def mlp_normalization(hidden_size = 300, n_epochs = 30, n_tests = 50, minibatch_size=20):
    """
    Compare mlps with different schemes for normalizing input.

    regular: Regular vanilla MLP
    normalize: Mean-subtract/normalize over minibatch
    normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable
        (per-unit) scale parameter.

    Conclusions: No significant benefit to scale parameter.  Normalizing gives
    a head start but incurs a small cost later on.  But really all classifiers are quite similar.

    :param hidden_size: Size of hidden layer
    """
    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda normalize, scale: GradientBasedPredictor(
            function = MultiLayerPerceptron(
                layer_sizes=[hidden_size, dataset.n_categories],
                input_size = dataset.input_size,
                hidden_activation='sig',
                output_activation='lin',
                normalize_minibatch=normalize,
                scale_param=scale,
                w_init = normal_w_init(mag = 0.01, seed = 5)
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = SimpleGradientDescent(eta = 0.1),
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'regular': make_mlp(normalize = False, scale = False),
            'normalize': make_mlp(normalize=True, scale = False),
            'normalize and scale': make_mlp(normalize=True, scale = True),
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
Exemple #4
0
def test_mlp():

    assert_online_predictor_not_broken(
        predictor_constructor = lambda n_dim_in, n_dim_out:
            GradientBasedPredictor(
                function = MultiLayerPerceptron(
                    layer_sizes = [100, n_dim_out],
                    input_size = n_dim_in,
                    output_activation='softmax',
                    w_init = lambda n_in, n_out, rng = np.random.RandomState(3252): 0.1*rng.randn(n_in, n_out)
                    ),
                cost_function=negative_log_likelihood_dangerous,
                optimizer=SimpleGradientDescent(eta = 0.1),
                ).compile(),
        categorical_target=True,
        minibatch_size=10,
        n_epochs=2
        )
Exemple #5
0
def demo_mnist_mlp(test_mode = False):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if test_mode:
        test_period = 200
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30)
    else:
        test_period = 1000
        minibatch_size = 20
        n_epochs = 10
        dataset = get_mnist_dataset()

    # Setup the training and test functions
    classifier = MultiLayerPerceptron(
        layer_sizes=[500, 10],
        input_size = 784,
        hidden_activation='sig',
        output_activation='softmax',
        w_init = normal_w_init(mag = 0.01)
        )
    training_cost_function = normalized_negative_log_likelihood
    optimizer = SimpleGradientDescent(eta = 0.1)
    training_function = SupervisedTrainingFunction(classifier, training_cost_function, optimizer).compile()
    test_cost_function = percent_correct
    test_function = SupervisedTestFunction(classifier, test_cost_function).compile()

    def report_test(i):
        training_cost = test_function(dataset.training_set.input, dataset.training_set.target)
        print 'Training score at iteration %s: %s' % (i, training_cost)
        test_cost = test_function(dataset.test_set.input, dataset.test_set.target)
        print 'Test score at iteration %s: %s' % (i, test_cost)

    # Train and periodically report the test score.
    print 'Running MLP on MNIST Dataset...'
    for i, (_, image_minibatch, label_minibatch) in enumerate(dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = n_epochs, single_channel = True)):
        if i % test_period == 0:
            report_test(i)
        training_function(image_minibatch, label_minibatch)
    report_test('Final')
    print '...Done.'
Exemple #6
0
def test_param_serialization():
    """
    Pros -
    :return:
    """

    dataset = get_synthetic_clusters_dataset()

    predictor_constructor = lambda: GradientBasedPredictor(
        function=MultiLayerPerceptron(layer_sizes=[100, dataset.n_categories],
                                      input_size=dataset.input_shape[0],
                                      output_activation='softmax',
                                      w_init=lambda n_in, n_out, rng=np.random.
                                      RandomState(3252): 0.1 * rng.randn(
                                          n_in, n_out)),
        cost_function=negative_log_likelihood_dangerous,
        optimizer=SimpleGradientDescent(eta=0.1),
    ).compile()

    evaluate = lambda pred: evaluate_predictor(pred, dataset.test_set,
                                               percent_argmax_correct)

    # Train up predictor and save params
    predictor = predictor_constructor()
    pre_training_score = evaluate(predictor)
    assert pre_training_score < 35
    train_online_predictor(predictor,
                           dataset.training_set,
                           minibatch_size=20,
                           n_epochs=3)
    post_training_score = evaluate(predictor)
    assert post_training_score > 95
    trained_param_string = dumps_params(predictor)

    # Instantiate new predictor and load params
    new_predictor = predictor_constructor()
    new_pre_training_score = evaluate(new_predictor)
    assert new_pre_training_score < 35
    loads_params(new_predictor, trained_param_string)
    loaded_score = evaluate(new_predictor)
    assert loaded_score == post_training_score > 95