Esempio n. 1
0
def demo_perceptron_dtp(
        hidden_sizes = [240],
        n_epochs = 20,
        n_tests = 20,
        minibatch_size=100,
        lin_dtp = True,
        ):

    dataset = get_mnist_dataset(flat = True).to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    predictor = DifferenceTargetMLP(
        layers=[PerceptronLayer.from_initializer(n_in, n_out, initial_mag=2, lin_dtp = lin_dtp)
                for n_in, n_out in zip([dataset.input_size]+hidden_sizes, hidden_sizes+[dataset.target_size])],
        output_cost_function = None
        ).compile()

    result = assess_online_predictor(
        predictor = predictor,
        dataset = dataset,
        minibatch_size=minibatch_size,
        evaluation_function='percent_argmax_correct',
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        )

    plot_learning_curves(result)
Esempio n. 2
0
def demo_compare_dtp_methods(
        predictor_constructors,
        n_epochs = 10,
        minibatch_size = 20,
        n_tests = 20,
        onehot = True,
        accumulator = None
        ):
    dataset = get_mnist_dataset(flat = True, binarize = False)
    n_categories = dataset.n_categories
    if onehot:
        dataset = dataset.to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors = {name: p(dataset.input_size, n_categories) for name, p in predictor_constructors.iteritems() if name in predictor_constructors},
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct,
        # online_test_callbacks={'perceptron': lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28))},
        accumulators=accumulator
        )

    plot_learning_curves(learning_curves)
Esempio n. 3
0
def demo_perceptron_dtp(
    hidden_sizes=[240],
    n_epochs=20,
    n_tests=20,
    minibatch_size=100,
    lin_dtp=True,
):

    dataset = get_mnist_dataset(flat=True).to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    predictor = DifferenceTargetMLP(layers=[
        PerceptronLayer.from_initializer(n_in,
                                         n_out,
                                         initial_mag=2,
                                         lin_dtp=lin_dtp)
        for n_in, n_out in zip([dataset.input_size] +
                               hidden_sizes, hidden_sizes +
                               [dataset.target_size])
    ],
                                    output_cost_function=None).compile()

    result = assess_online_predictor(
        predictor=predictor,
        dataset=dataset,
        minibatch_size=minibatch_size,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_tests),
    )

    plot_learning_curves(result)
Esempio n. 4
0
def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20):

    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda optimizer: GradientBasedPredictor(
            function = MultiLayerPerceptron.from_init(
                layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories],
                hidden_activation='sig',
                output_activation='lin',
                w_init = 0.01,
                rng = 5
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = optimizer,
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)),
            'adamax': make_mlp(AdaMax(alpha = 1e-3)),
            },
        minibatch_size = 20,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20):

    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda optimizer: GradientBasedPredictor(
            function = MultiLayerPerceptron(
                layer_sizes=[hidden_size, dataset.n_categories],
                input_size = dataset.input_size,
                hidden_activation='sig',
                output_activation='lin',
                w_init = normal_w_init(mag = 0.01, seed = 5)
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = optimizer,
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)),
            'adamax': make_mlp(AdaMax(alpha = 1e-3)),
            },
        minibatch_size = 20,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
Esempio n. 6
0
def demo_compare_dtp_methods(predictor_constructors,
                             n_epochs=10,
                             minibatch_size=20,
                             n_tests=20,
                             onehot=True,
                             accumulator=None):
    dataset = get_mnist_dataset(flat=True, binarize=False)
    n_categories = dataset.n_categories
    if onehot:
        dataset = dataset.to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors={
            name: p(dataset.input_size, n_categories)
            for name, p in predictor_constructors.iteritems()
            if name in predictor_constructors
        },
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
        # online_test_callbacks={'perceptron': lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28))},
        accumulators=accumulator)

    plot_learning_curves(learning_curves)
def test_compare_predictors(hang_plot=False):

    dataset = get_synthetic_clusters_dataset()

    w_constructor = lambda rng=np.random.RandomState(45): .1 * rng.randn(
        dataset.input_shape[0], dataset.n_categories)
    records = compare_predictors(
        dataset=dataset,
        offline_predictors={'SVM': SVC()},
        online_predictors={
            'fast-perceptron':
            Perceptron(alpha=0.1, w=w_constructor()).to_categorical(),
            'slow-perceptron':
            Perceptron(alpha=0.001, w=w_constructor()).to_categorical()
        },
        minibatch_size=10,
        test_epochs=sqrtspace(0, 10, 20),
        evaluation_function='percent_correct')

    assert 99 < records['SVM'].get_scores('Test') <= 100
    assert 20 < records['slow-perceptron'].get_scores(
        'Test')[0] < 40 and 95 < records['slow-perceptron'].get_scores(
            'Test')[-1] <= 100
    assert 20 < records['fast-perceptron'].get_scores(
        'Test')[0] < 40 and 98 < records['fast-perceptron'].get_scores(
            'Test')[-1] <= 100

    plot_learning_curves(records, hang=hang_plot)
Esempio n. 8
0
def demo_dtp_varieties(hidden_sizes=[240],
                       n_epochs=10,
                       minibatch_size=20,
                       n_tests=20,
                       hidden_activation='tanh',
                       output_activation='sigm',
                       optimizer='adamax',
                       learning_rate=0.01,
                       noise=1,
                       predictors=['MLP', 'DTP', 'PreAct-DTP', 'Linear-DTP'],
                       rng=1234,
                       use_bias=True,
                       live_plot=False,
                       plot=False):
    """
    ;

    :param hidden_sizes:
    :param n_epochs:
    :param minibatch_size:
    :param n_tests:
    :return:
    """
    if isinstance(predictors, str):
        predictors = [predictors]

    dataset = get_mnist_dataset(flat=True)
    dataset = dataset.process_with(
        targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))
    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    predictors = OrderedDict(
        (name,
         get_predictor(name,
                       input_size=dataset.input_size,
                       target_size=dataset.target_size,
                       hidden_sizes=hidden_sizes,
                       hidden_activation=hidden_activation,
                       output_activation=output_activation,
                       optimizer=optimizer,
                       learning_rate=learning_rate,
                       noise=noise,
                       use_bias=use_bias,
                       rng=rng)) for name in predictors)

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors=predictors,
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
    )

    if plot:
        plot_learning_curves(learning_curves)
Esempio n. 9
0
def compare_example_predictors(
    n_epochs=5,
    n_tests=20,
    minibatch_size=10,
):
    """
    This demo shows how we can compare different online predictors.  The demo trains both predictors on the dataset,
    returning an object that contains the results.

    :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break.
    """

    dataset = get_mnist_dataset(flat=True)
    # "Flatten" the 28x28 inputs to a 784-d vector

    if is_test_mode():
        # Shorten the dataset so we run through it quickly in test mode.
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 3

    # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest.
    # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and
    #   then compile it into an IPredictor object
    # - The Perceptron directly implements the IPredictor interface.
    # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method.
    learning_curve_data = compare_predictors(
        dataset=dataset,
        online_predictors={
            'Perceptron':
            Perceptron(w=np.zeros((dataset.input_size, dataset.n_categories)),
                       alpha=0.001).
            to_categorical(
                n_categories=dataset.n_categories
            ),  # .to_categorical allows the perceptron to be trained on integer labels.
            'MLP':
            GradientBasedPredictor(
                function=MultiLayerPerceptron.from_init(
                    layer_sizes=[
                        dataset.input_size, 500, dataset.n_categories
                    ],
                    hidden_activation='sig',  # Sigmoidal hidden units
                    output_activation=
                    'softmax',  # Softmax output unit, since we're doing multinomial classification
                    w_init=0.01,
                    rng=5),
                cost_function=
                negative_log_likelihood_dangerous,  # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax.
                optimizer=SimpleGradientDescent(eta=0.1),
            ).compile(),  # .compile() returns an IPredictor
        },
        offline_predictors={'RF': RandomForestClassifier(n_estimators=40)},
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct  # Compares one-hot
    )
    # Results is a LearningCurveData object
    return learning_curve_data
Esempio n. 10
0
def compare_example_predictors(
        n_epochs = 5,
        n_tests = 20,
        minibatch_size = 10,
    ):
    """
    This demo shows how we can compare different online predictors.  The demo trains both predictors on the dataset,
    returning an object that contains the results.

    :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break.
    """

    dataset = get_mnist_dataset(flat = True)
    # "Flatten" the 28x28 inputs to a 784-d vector

    if is_test_mode():
        # Shorten the dataset so we run through it quickly in test mode.
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 3

    # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest.
    # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and
    #   then compile it into an IPredictor object
    # - The Perceptron directly implements the IPredictor interface.
    # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method.
    learning_curve_data = compare_predictors(
        dataset = dataset,
        online_predictors = {
            'Perceptron': Perceptron(
                w = np.zeros((dataset.input_size, dataset.n_categories)),
                alpha = 0.001
                ).to_categorical(n_categories = dataset.n_categories),  # .to_categorical allows the perceptron to be trained on integer labels.
            'MLP': GradientBasedPredictor(
                function = MultiLayerPerceptron.from_init(
                    layer_sizes=[dataset.input_size, 500, dataset.n_categories],
                    hidden_activation='sig',  # Sigmoidal hidden units
                    output_activation='softmax',  # Softmax output unit, since we're doing multinomial classification
                    w_init = 0.01,
                    rng = 5
                ),
                cost_function = negative_log_likelihood_dangerous,  # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax.
                optimizer = SimpleGradientDescent(eta = 0.1),
                ).compile(),  # .compile() returns an IPredictor
            },
        offline_predictors={
            'RF': RandomForestClassifier(n_estimators = 40)
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct  # Compares one-hot
        )
    # Results is a LearningCurveData object
    return learning_curve_data
Esempio n. 11
0
def demo_compare_dtp_optimizers(
    hidden_sizes=[240],
    n_epochs=10,
    minibatch_size=20,
    n_tests=20,
    hidden_activation='tanh',
):

    dataset = get_mnist_dataset(flat=True).to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    def make_dtp_net(optimizer_constructor, output_fcn):
        return DifferenceTargetMLP.from_initializer(
            input_size=dataset.input_size,
            output_size=dataset.target_size,
            hidden_sizes=hidden_sizes,
            optimizer_constructor=optimizer_constructor,
            input_activation='sigm',
            hidden_activation=hidden_activation,
            output_activation=output_fcn,
            w_init_mag=0.01,
            noise=1,
        ).compile()

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors={
            'SGD-0.001-softmax':
            make_dtp_net(lambda: SimpleGradientDescent(0.001),
                         output_fcn='softmax'),
            'AdaMax-0.001-softmax':
            make_dtp_net(lambda: AdaMax(0.001), output_fcn='softmax'),
            'RMSProp-0.001-softmax':
            make_dtp_net(lambda: RMSProp(0.001), output_fcn='softmax'),
            'SGD-0.001-sigm':
            make_dtp_net(lambda: SimpleGradientDescent(0.001),
                         output_fcn='sigm'),
            'AdaMax-0.001-sigm':
            make_dtp_net(lambda: AdaMax(0.001), output_fcn='sigm'),
            'RMSProp-0.001-sigm':
            make_dtp_net(lambda: RMSProp(0.001), output_fcn='sigm'),
        },
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
    )

    plot_learning_curves(learning_curves)
Esempio n. 12
0
def demo_run_dtp_on_mnist(
        hidden_sizes = [240],
        n_epochs = 20,
        n_tests = 20,
        minibatch_size=100,
        input_activation = 'sigm',
        hidden_activation = 'tanh',
        output_activation = 'softmax',
        optimizer_constructor = lambda: RMSProp(0.001),
        normalize_inputs = False,
        local_cost_function = mean_squared_error,
        output_cost_function = None,
        noise = 1,
        lin_dtp = False,
        seed = 1234
        ):

    dataset = get_mnist_dataset(flat = True).to_onehot()
    if normalize_inputs:
        dataset = dataset.process_with(targets_processor=multichannel(lambda x: x/np.sum(x, axis = 1, keepdims=True)))
    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    predictor = DifferenceTargetMLP.from_initializer(
            input_size = dataset.input_size,
            output_size = dataset.target_size,
            hidden_sizes = hidden_sizes,
            optimizer_constructor = optimizer_constructor,  # Note that RMSProp/AdaMax way outperform SGD here.
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init_mag=0.01,
            output_cost_function=output_cost_function,
            noise = noise,
            cost_function = local_cost_function,
            layer_constructor=DifferenceTargetLayer.from_initializer if not lin_dtp else PreActivationDifferenceTargetLayer.from_initializer,
            rng = seed
            ).compile()

    result = assess_online_predictor(
        predictor = predictor,
        dataset = dataset,
        minibatch_size=minibatch_size,
        evaluation_function='percent_argmax_correct',
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        test_callback=lambda p: dbplot(p.symbolic_predictor.layers[0].w.get_value().T.reshape(-1, 28, 28))
        )

    plot_learning_curves(result)
Esempio n. 13
0
def demo_run_dtp_on_mnist(hidden_sizes=[240],
                          n_epochs=20,
                          n_tests=20,
                          minibatch_size=100,
                          input_activation='sigm',
                          hidden_activation='tanh',
                          output_activation='softmax',
                          optimizer_constructor=lambda: RMSProp(0.001),
                          normalize_inputs=False,
                          local_cost_function=mean_squared_error,
                          output_cost_function=None,
                          noise=1,
                          lin_dtp=False,
                          seed=1234):

    dataset = get_mnist_dataset(flat=True).to_onehot()
    if normalize_inputs:
        dataset = dataset.process_with(targets_processor=multichannel(
            lambda x: x / np.sum(x, axis=1, keepdims=True)))
    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    predictor = DifferenceTargetMLP.from_initializer(
        input_size=dataset.input_size,
        output_size=dataset.target_size,
        hidden_sizes=hidden_sizes,
        optimizer_constructor=
        optimizer_constructor,  # Note that RMSProp/AdaMax way outperform SGD here.
        # input_activation=input_activation,
        hidden_activation=hidden_activation,
        output_activation=output_activation,
        w_init_mag=0.01,
        output_cost_function=output_cost_function,
        noise=noise,
        cost_function=local_cost_function,
        layer_constructor=DifferenceTargetLayer.from_initializer if not lin_dtp
        else PreActivationDifferenceTargetLayer.from_initializer,
        rng=seed).compile()

    result = assess_online_predictor(
        predictor=predictor,
        dataset=dataset,
        minibatch_size=minibatch_size,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        test_callback=lambda p: dbplot(p.symbolic_predictor.layers[0].w.
                                       get_value().T.reshape(-1, 28, 28)))

    plot_learning_curves(result)
def demo_dtp_varieties(
        hidden_sizes = [240],
        n_epochs = 10,
        minibatch_size = 20,
        n_tests = 20,
        hidden_activation = 'tanh',
        output_activation = 'sigm',
        optimizer = 'adamax',
        learning_rate = 0.01,
        noise = 1,
        predictors = ['MLP', 'DTP', 'PreAct-DTP', 'Linear-DTP'],
        rng = 1234,
        live_plot = False,
        plot = False
        ):
    """
    ;

    :param hidden_sizes:
    :param n_epochs:
    :param minibatch_size:
    :param n_tests:
    :return:
    """
    if isinstance(predictors, str):
        predictors = [predictors]

    dataset = get_mnist_dataset(flat = True)
    dataset = dataset.process_with(targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))
    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    predictors = OrderedDict((name, get_predictor(name, input_size = dataset.input_size, target_size=dataset.target_size,
            hidden_sizes=hidden_sizes, hidden_activation=hidden_activation, output_activation = output_activation,
            optimizer=optimizer, learning_rate=learning_rate, noise = noise, rng = rng)) for name in predictors)

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors = predictors,
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct,
        )

    if plot:
        plot_learning_curves(learning_curves)
Esempio n. 15
0
def mlp_normalization(hidden_size=300,
                      n_epochs=30,
                      n_tests=50,
                      minibatch_size=20):
    """
    Compare mlp with different schemes for normalizing input.

    regular: Regular vanilla MLP
    normalize: Mean-subtract/normalize over minibatch
    normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable
        (per-unit) scale parameter.

    Conclusions: No significant benefit to scale parameter.  Normalizing gives
    a head start but incurs a small cost later on.  But really all classifiers are quite similar.

    :param hidden_size: Size of hidden layer
    """
    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda normalize, scale: GradientBasedPredictor(
        function=MultiLayerPerceptron.from_init(layer_sizes=[
            dataset.input_size, hidden_size, dataset.n_categories
        ],
                                                hidden_activation='sig',
                                                output_activation='lin',
                                                normalize_minibatch=normalize,
                                                scale_param=scale,
                                                w_init=0.01,
                                                rng=5),
        cost_function=softmax_negative_log_likelihood,
        optimizer=SimpleGradientDescent(eta=0.1),
    ).compile()

    return compare_predictors(dataset=dataset,
                              online_predictors={
                                  'regular':
                                  make_mlp(normalize=False, scale=False),
                                  'normalize':
                                  make_mlp(normalize=True, scale=False),
                                  'normalize and scale':
                                  make_mlp(normalize=True, scale=True),
                              },
                              minibatch_size=minibatch_size,
                              test_epochs=sqrtspace(0, n_epochs, n_tests),
                              evaluation_function=percent_argmax_correct)
Esempio n. 16
0
def mlp_normalization(hidden_size = 300, n_epochs = 30, n_tests = 50, minibatch_size=20):
    """
    Compare mlp with different schemes for normalizing input.

    regular: Regular vanilla MLP
    normalize: Mean-subtract/normalize over minibatch
    normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable
        (per-unit) scale parameter.

    Conclusions: No significant benefit to scale parameter.  Normalizing gives
    a head start but incurs a small cost later on.  But really all classifiers are quite similar.

    :param hidden_size: Size of hidden layer
    """
    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda normalize, scale: GradientBasedPredictor(
            function = MultiLayerPerceptron.from_init(
                layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories],
                hidden_activation='sig',
                output_activation='lin',
                normalize_minibatch=normalize,
                scale_param=scale,
                w_init = 0.01,
                rng = 5
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = SimpleGradientDescent(eta = 0.1),
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'regular': make_mlp(normalize = False, scale = False),
            'normalize': make_mlp(normalize=True, scale = False),
            'normalize and scale': make_mlp(normalize=True, scale = True),
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
Esempio n. 17
0
def demo_mnist_online_regression(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        regressor_type = 'multinomial',
        n_epochs = 20,
        n_test_points = 30,
        max_training_samples = None,
        include_biases = True,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30, flat = True)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples, flat = True)

    assert regressor_type in ('multinomial', 'logistic', 'linear')

    n_outputs = dataset.n_categories
    if regressor_type in ('logistic', 'linear'):
        dataset = dataset.to_onehot()

    predictor = OnlineRegressor(
        input_size = dataset.input_size,
        output_size = n_outputs,
        regressor_type = regressor_type,
        optimizer=get_named_optimizer(name = optimizer, learning_rate=learning_rate),
        include_biases = include_biases
        ).compile()

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size
    )

    plot_learning_curves(results)
Esempio n. 18
0
def demo_mnist_online_regression(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        regressor_type = 'multinomial',
        n_epochs = 20,
        n_test_points = 30,
        max_training_samples = None,
        include_biases = True,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30, flat = True)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples, flat = True)

    assert regressor_type in ('multinomial', 'logistic', 'linear')

    n_outputs = dataset.n_categories
    if regressor_type in ('logistic', 'linear'):
        dataset = dataset.to_onehot()

    predictor = OnlineRegressor(
        input_size = dataset.input_size,
        output_size = n_outputs,
        regressor_type = regressor_type,
        optimizer=get_named_optimizer(name = optimizer, learning_rate=learning_rate),
        include_biases = include_biases
        ).compile()

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size
    )

    plot_learning_curves(results)
Esempio n. 19
0
def backprop_vs_difference_target_prop(hidden_sizes=[240],
                                       n_epochs=10,
                                       minibatch_size=20,
                                       n_tests=20):

    dataset = get_mnist_dataset(flat=True)
    dataset = dataset.process_with(
        targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    return compare_predictors(
        dataset=dataset,
        online_predictors={
            'backprop-mlp':
            GradientBasedPredictor(
                function=MultiLayerPerceptron.from_init(
                    layer_sizes=[dataset.input_size] + hidden_sizes +
                    [dataset.n_categories],
                    hidden_activation='tanh',
                    output_activation='sig',
                    w_init=0.01,
                    rng=5),
                cost_function=mean_squared_error,
                optimizer=AdaMax(0.01),
            ).compile(),
            'difference-target-prop-mlp':
            DifferenceTargetMLP.from_initializer(
                input_size=dataset.input_size,
                output_size=dataset.target_size,
                hidden_sizes=hidden_sizes,
                optimizer_constructor=lambda: AdaMax(0.01),
                w_init=0.01,
                noise=1,
            ).compile()
        },
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
    )
Esempio n. 20
0
def demo_compare_dtp_optimizers(
        hidden_sizes = [240],
        n_epochs = 10,
        minibatch_size = 20,
        n_tests = 20,
        hidden_activation = 'tanh',
        ):

    dataset = get_mnist_dataset(flat = True).to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    def make_dtp_net(optimizer_constructor, output_fcn):
        return DifferenceTargetMLP.from_initializer(
            input_size = dataset.input_size,
            output_size = dataset.target_size,
            hidden_sizes = hidden_sizes,
            optimizer_constructor = optimizer_constructor,
            input_activation='sigm',
            hidden_activation=hidden_activation,
            output_activation=output_fcn,
            w_init_mag=0.01,
            noise = 1,
            ).compile()

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors = {
            'SGD-0.001-softmax': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn = 'softmax'),
            'AdaMax-0.001-softmax': make_dtp_net(lambda: AdaMax(0.001), output_fcn = 'softmax'),
            'RMSProp-0.001-softmax': make_dtp_net(lambda: RMSProp(0.001), output_fcn = 'softmax'),
            'SGD-0.001-sigm': make_dtp_net(lambda: SimpleGradientDescent(0.001), output_fcn = 'sigm'),
            'AdaMax-0.001-sigm': make_dtp_net(lambda: AdaMax(0.001), output_fcn = 'sigm'),
            'RMSProp-0.001-sigm': make_dtp_net(lambda: RMSProp(0.001), output_fcn = 'sigm'),
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct,
        )

    plot_learning_curves(learning_curves)
Esempio n. 21
0
def backprop_vs_difference_target_prop(
        hidden_sizes = [240],
        n_epochs = 10,
        minibatch_size = 20,
        n_tests = 20
        ):

    dataset = get_mnist_dataset(flat = True)
    dataset = dataset.process_with(targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'backprop-mlp': GradientBasedPredictor(
                function = MultiLayerPerceptron.from_init(
                layer_sizes=[dataset.input_size]+hidden_sizes+[dataset.n_categories],
                    hidden_activation='tanh',
                    output_activation='sig',
                    w_init = 0.01,
                    rng = 5
                    ),
                cost_function = mean_squared_error,
                optimizer = AdaMax(0.01),
                ).compile(),
            'difference-target-prop-mlp': DifferenceTargetMLP.from_initializer(
                input_size = dataset.input_size,
                output_size = dataset.target_size,
                hidden_sizes = hidden_sizes,
                optimizer_constructor = lambda: AdaMax(0.01),
                w_init=0.01,
                noise = 1,
            ).compile()
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct,
        )
Esempio n. 22
0
def test_compare_predictors_old():

    x_tr, y_tr, x_ts, y_ts, w_true = get_logistic_regression_data(noise_factor = 0.1)
    dataset = DataSet(DataCollection(x_tr, y_tr), DataCollection(x_ts, y_ts)).process_with(targets_processor=lambda (x, ): (OneHotEncoding()(x[:, 0]), ))

    w_init = 0.1*np.random.randn(dataset.training_set.input.shape[1], dataset.training_set.target.shape[1])
    records = compare_predictors_old(
        dataset = dataset,
        offline_predictor_constructors={
            'Optimal': lambda: MockPredictor(lambda x: sigm(x.dot(w_true)))
            },
        online_predictor_constructors={
            'fast-perceptron': lambda: Perceptron(alpha = 0.1, w = w_init.copy()),
            'slow-perceptron': lambda: Perceptron(alpha = 0.001, w = w_init.copy())
            },
        minibatch_size = 10,
        test_points = sqrtspace(0, 10, 20),
        evaluation_function='mse'
        )
    plot_learning_curves(records, hang = False)
Esempio n. 23
0
def test_compare_predictors(hang_plot = False):

    dataset = get_synthetic_clusters_dataset()

    w_constructor = lambda rng = np.random.RandomState(45): .1*rng.randn(dataset.input_shape[0], dataset.n_categories)
    records = compare_predictors(
        dataset = dataset,
        offline_predictors={
            'SVM': SVC()
            },
        online_predictors={
            'fast-perceptron': Perceptron(alpha = 0.1, w = w_constructor()).to_categorical(),
            'slow-perceptron': Perceptron(alpha = 0.001, w = w_constructor()).to_categorical()
            },
        minibatch_size = 10,
        test_epochs = sqrtspace(0, 10, 20),
        evaluation_function='percent_correct'
        )

    assert 99 < records['SVM'].get_scores('Test') <= 100
    assert 20 < records['slow-perceptron'].get_scores('Test')[0] < 40 and 95 < records['slow-perceptron'].get_scores('Test')[-1] <= 100
    assert 20 < records['fast-perceptron'].get_scores('Test')[0] < 40 and 98 < records['fast-perceptron'].get_scores('Test')[-1] <= 100

    plot_learning_curves(records, hang = hang_plot)
Esempio n. 24
0
def compare_predictors(dataset, online_predictors={}, offline_predictors={}, minibatch_size = 'full',
        evaluation_function = 'mse', test_epochs = sqrtspace(0, 1, 10), report_test_scores = True,
        test_on = 'training+test', test_batch_size = None, accumulators = None):
    """
    Compare a set of predictors by running them on a dataset, and return the learning curves for each predictor.

    :param dataset: A DataSet object
    :param online_predictors: A dict<str:IPredictor> of online predictors.  An online predictor is
        sequentially fed minibatches of data and updates its parameters with each minibatch.
    :param offline_predictors: A dict<str:object> of offline predictors.  Offline predictors obey sklearn's
        Estimator/Predictor interfaces - ie they methods
            estimator = object.fit(data, targets) and
            prediction = object.predict(data)
    :param minibatch_size: Size of the minibatches to use for online predictors.  Can be:
        An int, in which case it represents the minibatch size for all classifiers.
        A dict<str: int>, in which case you can set the minibatch size per-classifier.
        In place of the int, you can put 'all' if you want to train on the whole dataset in each iteration.
    :param test_epochs: Test points to use for online predictors.  Can be:
        A list of integers - in which case the classifier is after seeing this many samples.
        A list of floats - in which case the classifier is tested after seeing this many epochs.
        'always' - In which case a test is performed after every training step
        The final test point determines the end of training.
    :param evaluation_function: Function used to evaluate output of predictors
    :param report_test_scores: Boolean indicating whether you'd like to report results online.
    :return: An OrderedDict<LearningCurveData>
    """

    all_keys = online_predictors.keys()+offline_predictors.keys()
    assert len(all_keys) > 0, 'You have to give at least one predictor.  Is that too much to ask?'
    assert len(all_keys) == len(np.unique(all_keys)), "You have multiple predictors using the same names. Change that."
    type_constructor_dict = OrderedDict(
        [(k, ('offline', offline_predictors[k])) for k in sorted(offline_predictors.keys())] +
        [(k, ('online', online_predictors[k])) for k in sorted(online_predictors.keys())]
        )

    if not isinstance(minibatch_size, dict):
        minibatch_size = {predictor_name: minibatch_size for predictor_name in online_predictors.keys()}
    else:
        assert online_predictors.viewkeys() == minibatch_size.viewkeys()

    if not isinstance(accumulators, dict):
        accumulators = {predictor_name: accumulators for predictor_name in online_predictors.keys()}
    else:
        assert online_predictors.viewkeys() == accumulators.viewkeys()

    test_epochs = np.array(test_epochs)
    # test_epochs_float = test_epochs.dtype == float
    # if test_epochs_float:
    #     test_epochs = (test_epochs * dataset.training_set.n_samples).astype(int)
    if isinstance(evaluation_function, str):
        evaluation_function = get_evaluation_function(evaluation_function)

    records = OrderedDict()

    # Run the offline predictors
    for predictor_name, (predictor_type, predictor) in type_constructor_dict.iteritems():
        print '%s\nRunning predictor %s\n%s' % ('='*20, predictor_name, '-'*20)
        records[predictor_name] = \
            assess_offline_predictor(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                report_test_scores = report_test_scores,
                test_on = test_on,
                test_batch_size = test_batch_size
                ) if predictor_type == 'offline' else \
            assess_online_predictor(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                test_epochs = test_epochs,
                accumulator = accumulators[predictor_name],
                minibatch_size = minibatch_size[predictor_name],
                report_test_scores = report_test_scores,
                test_on = test_on,
                test_batch_size = test_batch_size,
                ) if predictor_type == 'online' else \
            bad_value(predictor_type)

    print 'Done!'

    return records
Esempio n. 25
0
def compare_predictors_old(dataset,
                           online_predictor_constructors={},
                           offline_predictor_constructors={},
                           incremental_predictor_constructors={},
                           minibatch_size=1,
                           test_points=sqrtspace(0, 1, 10),
                           evaluation_function='mse',
                           report_test_scores=True,
                           on_construction_callback=None):
    """
    DEPRECATED!  See above

    Compare a set of predictors by running them on a dataset, and return the learning curves for each predictor.

    :param dataset: A DataSet object
    :param online_predictor_constructors: A dict<str:function> of online predictors.  An online predictor is
        sequentially fed minibatches of data and updates its parameters with each minibatch.
    :param offline_predictor_constructors: A dict<str:function> of offline predictors.  An offline predictor
        trains just once on the full training data, and then makes a prediction on the test data.  Unlike
        Online, Incremental predictors, an Offline predictor has no initial state, so it doesn't make sense
        to ask it to predict before any training has been done.
    :param incremental_predictor_constructors: A dict<str:function> of incremental predictors.  An incremental
        predictor gets the whole dataset in each pass, and updates its parameters each time.
    :param minibatch_size: Size of the minibatches to use for online predictors.  Can be:
        An int, in which case it represents the minibatch size for all classifiers.
        A dict<str: int>, in which case you can set the minibatch size per-classifier.
        In place of the int, you can put 'all' if you want to train on the whole dataset in each iteration.
    :param test_points: Test points to use.  Can be:
        A list of integers - in which case the classifier is after seeing this many samples.
        A list of floats - in which case the classifier is tested after seeing this many epochs.
        'always' - In which case a test is performed after every training step
        The final test point determines the end of training.
    :param evaluation_function: Function used to evaluate output of predictors
    :param report_test_scores: Boolean indicating whether you'd like to report results online.
    :param on_construction_callback: A function of the form callback(predictor) that is called when a predictor
        is constructed.  This may be useful for debugging.
    :return: An OrderedDict<LearningCurveData>
    """

    all_keys = online_predictor_constructors.keys(
    ) + offline_predictor_constructors.keys(
    ) + incremental_predictor_constructors.keys()
    assert len(all_keys) > 0, 'You have to give at least one predictor.'
    assert len(all_keys) == len(
        np.unique(all_keys)
    ), "You have multiple predictors using the same names. Change that."
    type_constructor_dict = OrderedDict(
        [(k, ('offline', offline_predictor_constructors[k]))
         for k in sorted(offline_predictor_constructors.keys())] +
        [(k, ('online', online_predictor_constructors[k]))
         for k in sorted(online_predictor_constructors.keys())] +
        [(k, ('incremental', incremental_predictor_constructors[k]))
         for k in sorted(incremental_predictor_constructors.keys())])

    if isinstance(minibatch_size, int):
        minibatch_size = {
            predictor_name: minibatch_size
            for predictor_name in online_predictor_constructors.keys()
        }
    else:
        assert online_predictor_constructors.viewkeys(
        ) == minibatch_size.viewkeys()
    test_points = np.array(test_points)
    test_points_float = test_points.dtype == float
    if test_points_float:
        test_points = (test_points *
                       dataset.training_set.n_samples).astype(int)
    if isinstance(evaluation_function, str):
        evaluation_function = get_evaluation_function(evaluation_function)

    records = OrderedDict()

    # Run the offline predictors
    for predictor_name, (
            predictor_type,
            predictor_constructor) in type_constructor_dict.iteritems():
        predictor = predictor_constructor()
        if on_construction_callback is not None:
            on_construction_callback(predictor)
        print '%s\nRunning predictor %s\n%s' % ('=' * 20, predictor_name,
                                                '-' * 20)
        records[predictor_name] = \
            assess_offline_predictor(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                report_test_scores = report_test_scores
                ) if predictor_type == 'offline' else \
            assess_online_predictor_old(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                test_points = test_points,
                minibatch_size = minibatch_size[predictor_name],
                report_test_scores = report_test_scores
                ) if predictor_type == 'online' else \
            assess_incremental_predictor_old(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                sampling_points = np.sort(np.unique(np.ceil(test_points/dataset.training_set.n_samples).astype(int))),
                accumulation_function='mean',
                report_test_scores = report_test_scores
                )
    print 'Done!'

    return records
Esempio n. 26
0
def demo_mnist_mlp(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        hidden_sizes = [300],
        w_init = 0.01,
        hidden_activation = 'tanh',
        output_activation = 'softmax',
        cost = 'nll-d',
        visualize_params = False,
        n_test_points = 30,
        n_epochs = 10,
        max_training_samples = None,
        use_bias = True,
        onehot = False,
        rng = 1234,
        plot = False,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples)

    if onehot:
        dataset = dataset.to_onehot()

    if minibatch_size == 'full':
        minibatch_size = dataset.training_set.n_samples

    optimizer = get_named_optimizer(name = optimizer, learning_rate=learning_rate)

    # Setup the training and test functions
    predictor = GradientBasedPredictor(
        function = MultiLayerPerceptron.from_init(
            layer_sizes=[dataset.input_size]+hidden_sizes+[10],
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init = w_init,
            use_bias=use_bias,
            rng = rng,
            ),
        cost_function=cost,
        optimizer=optimizer
        ).compile()  # .compile() turns the GradientBasedPredictor, which works with symbolic variables, into a real one that takes and returns arrays.

    def vis_callback(xx):
        p = predictor.symbolic_predictor._function
        in_layer = {
            'Layer[0].w': p.layers[0].linear_transform._w.get_value().T.reshape(-1, 28, 28),
            'Layer[0].b': p.layers[0].linear_transform._b.get_value(),
            }
        other_layers = [{'Layer[%s].w' % (i+1): l.linear_transform._w.get_value(), 'Layer[%s].b' % (i+1): l.linear_transform._b.get_value()} for i, l in enumerate(p.layers[1:])]
        dbplot(dict(in_layer.items() + sum([o.items() for o in other_layers], [])))

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size,
        test_callback=vis_callback if visualize_params else None
    )

    if plot:
        plot_learning_curves(results)
Esempio n. 27
0
def compare_predictors(dataset,
                       online_predictors={},
                       offline_predictors={},
                       minibatch_size='full',
                       evaluation_function='mse',
                       test_epochs=sqrtspace(0, 1, 10),
                       report_test_scores=True,
                       test_on='training+test',
                       test_batch_size=None,
                       accumulators=None,
                       online_test_callbacks={}):
    """
    Compare a set of predictors by running them on a dataset, and return the learning curves for each predictor.

    :param dataset: A DataSet object
    :param online_predictors: A dict<str:IPredictor> of online predictors.  An online predictor is
        sequentially fed minibatches of data and updates its parameters with each minibatch.
    :param offline_predictors: A dict<str:object> of offline predictors.  Offline predictors obey sklearn's
        Estimator/Predictor interfaces - ie they methods
            estimator = object.fit(data, targets) and
            prediction = object.predict(data)
    :param minibatch_size: Size of the minibatches to use for online predictors.  Can be:
        An int, in which case it represents the minibatch size for all classifiers.
        A dict<str: int>, in which case you can set the minibatch size per-classifier.
        In place of the int, you can put 'all' if you want to train on the whole dataset in each iteration.
    :param test_epochs: Test points to use for online predictors.  Can be:
        A list of integers - in which case the classifier is after seeing this many samples.
        A list of floats - in which case the classifier is tested after seeing this many epochs.
        'always' - In which case a test is performed after every training step
        The final test point determines the end of training.
    :param evaluation_function: Function used to evaluate output of predictors
    :param report_test_scores: Boolean indicating whether you'd like to report results online.
    :param test_on: 'training', 'test', 'training+test'
    :param test_batch_size: When the test set is too large to process in one step, use this to break it
        up into chunks.
    :param accumulators: A dict<str: accum_fcn>, where accum_fcn is a stateful-function of the form:
        accmulated_output = accum_fcn(this_output)
        Special case: accum_fcn can be 'avg' to make a running average.
    :param online_test_callbacks: A dict<str: fcn> where fcn is a callback that takes an online
        predictor as an argument.  Useful for logging/plotting/debugging progress during training.
    :return: An OrderedDict<LearningCurveData>
    """

    all_keys = online_predictors.keys() + offline_predictors.keys()
    assert len(
        all_keys
    ) > 0, 'You have to give at least one predictor.  Is that too much to ask?'
    assert len(all_keys) == len(
        np.unique(all_keys)
    ), "You have multiple predictors using the same names. Change that."
    type_constructor_dict = OrderedDict(
        [(k, ('offline', offline_predictors[k]))
         for k in sorted(offline_predictors.keys())] +
        [(k, ('online', online_predictors[k]))
         for k in sorted(online_predictors.keys())])

    minibatch_size = _pack_into_dict(minibatch_size,
                                     expected_keys=online_predictors.keys())
    accumulators = _pack_into_dict(accumulators,
                                   expected_keys=online_predictors.keys())
    online_test_callbacks = _pack_into_dict(
        online_test_callbacks,
        expected_keys=online_predictors.keys(),
        allow_subset=True)
    test_epochs = np.array(test_epochs)
    if isinstance(evaluation_function, str):
        evaluation_function = get_evaluation_function(evaluation_function)

    records = OrderedDict()

    # Run the offline predictors
    for predictor_name, (predictor_type,
                         predictor) in type_constructor_dict.iteritems():
        print '%s\nRunning predictor %s\n%s' % ('=' * 20, predictor_name,
                                                '-' * 20)
        records[predictor_name] = \
            assess_offline_predictor(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                report_test_scores = report_test_scores,
                test_on = test_on,
                test_batch_size = test_batch_size
                ) if predictor_type == 'offline' else \
            assess_online_predictor(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                test_epochs = test_epochs,
                accumulator = accumulators[predictor_name],
                minibatch_size = minibatch_size[predictor_name],
                report_test_scores = report_test_scores,
                test_on = test_on,
                test_batch_size = test_batch_size,
                test_callback=online_test_callbacks[predictor_name] if predictor_name in online_test_callbacks else None
                ) if predictor_type == 'online' else \
            bad_value(predictor_type)

    print 'Done!'

    return records
Esempio n. 28
0
def demo_mnist_mlp(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        hidden_sizes = [300],
        w_init = 0.01,
        hidden_activation = 'tanh',
        output_activation = 'softmax',
        cost = 'nll-d',
        visualize_params = False,
        n_test_points = 30,
        n_epochs = 10,
        max_training_samples = None,
        use_bias = True,
        onehot = False,
        rng = 1234,
        plot = False,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples)

    if onehot:
        dataset = dataset.to_onehot()

    if minibatch_size == 'full':
        minibatch_size = dataset.training_set.n_samples

    optimizer = get_named_optimizer(name = optimizer, learning_rate=learning_rate)

    # Setup the training and test functions
    predictor = GradientBasedPredictor(
        function = MultiLayerPerceptron.from_init(
            layer_sizes=[dataset.input_size]+hidden_sizes+[10],
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init = w_init,
            use_bias=use_bias,
            rng = rng,
            ),
        cost_function=cost,
        optimizer=optimizer
        ).compile()  # .compile() turns the GradientBasedPredictor, which works with symbolic variables, into a real one that takes and returns arrays.

    def vis_callback(xx):
        p = predictor.symbolic_predictor._function
        in_layer = {
            'Layer[0].w': p.layers[0].linear_transform._w.get_value().T.reshape(-1, 28, 28),
            'Layer[0].b': p.layers[0].linear_transform._b.get_value(),
            }
        other_layers = [{'Layer[%s].w' % (i+1): l.linear_transform._w.get_value(), 'Layer[%s].b' % (i+1): l.linear_transform._b.get_value()} for i, l in enumerate(p.layers[1:])]
        dbplot(dict(in_layer.items() + sum([o.items() for o in other_layers], [])))

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size,
        test_callback=vis_callback if visualize_params else None
    )

    if plot:
        plot_learning_curves(results)
Esempio n. 29
0
def compare_predictors_old(dataset, online_predictor_constructors = {}, offline_predictor_constructors = {},
        incremental_predictor_constructors = {}, minibatch_size = 1, test_points = sqrtspace(0, 1, 10),
        evaluation_function = 'mse', report_test_scores = True, on_construction_callback = None):
    """
    DEPRECATED!  See above

    Compare a set of predictors by running them on a dataset, and return the learning curves for each predictor.

    :param dataset: A DataSet object
    :param online_predictor_constructors: A dict<str:function> of online predictors.  An online predictor is
        sequentially fed minibatches of data and updates its parameters with each minibatch.
    :param offline_predictor_constructors: A dict<str:function> of offline predictors.  An offline predictor
        trains just once on the full training data, and then makes a prediction on the test data.  Unlike
        Online, Incremental predictors, an Offline predictor has no initial state, so it doesn't make sense
        to ask it to predict before any training has been done.
    :param incremental_predictor_constructors: A dict<str:function> of incremental predictors.  An incremental
        predictor gets the whole dataset in each pass, and updates its parameters each time.
    :param minibatch_size: Size of the minibatches to use for online predictors.  Can be:
        An int, in which case it represents the minibatch size for all classifiers.
        A dict<str: int>, in which case you can set the minibatch size per-classifier.
        In place of the int, you can put 'all' if you want to train on the whole dataset in each iteration.
    :param test_points: Test points to use.  Can be:
        A list of integers - in which case the classifier is after seeing this many samples.
        A list of floats - in which case the classifier is tested after seeing this many epochs.
        'always' - In which case a test is performed after every training step
        The final test point determines the end of training.
    :param evaluation_function: Function used to evaluate output of predictors
    :param report_test_scores: Boolean indicating whether you'd like to report results online.
    :param on_construction_callback: A function of the form callback(predictor) that is called when a predictor
        is constructed.  This may be useful for debugging.
    :return: An OrderedDict<LearningCurveData>
    """

    all_keys = online_predictor_constructors.keys()+offline_predictor_constructors.keys()+incremental_predictor_constructors.keys()
    assert len(all_keys) > 0, 'You have to give at least one predictor.'
    assert len(all_keys) == len(np.unique(all_keys)), "You have multiple predictors using the same names. Change that."
    type_constructor_dict = OrderedDict(
        [(k, ('offline', offline_predictor_constructors[k])) for k in sorted(offline_predictor_constructors.keys())] +
        [(k, ('online', online_predictor_constructors[k])) for k in sorted(online_predictor_constructors.keys())] +
        [(k, ('incremental', incremental_predictor_constructors[k])) for k in sorted(incremental_predictor_constructors.keys())]
        )

    if isinstance(minibatch_size, int):
        minibatch_size = {predictor_name: minibatch_size for predictor_name in online_predictor_constructors.keys()}
    else:
        assert online_predictor_constructors.viewkeys() == minibatch_size.viewkeys()
    test_points = np.array(test_points)
    test_points_float = test_points.dtype == float
    if test_points_float:
        test_points = (test_points * dataset.training_set.n_samples).astype(int)
    if isinstance(evaluation_function, str):
        evaluation_function = get_evaluation_function(evaluation_function)

    records = OrderedDict()

    # Run the offline predictors
    for predictor_name, (predictor_type, predictor_constructor) in type_constructor_dict.iteritems():
        predictor = predictor_constructor()
        if on_construction_callback is not None:
            on_construction_callback(predictor)
        print '%s\nRunning predictor %s\n%s' % ('='*20, predictor_name, '-'*20)
        records[predictor_name] = \
            assess_offline_predictor(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                report_test_scores = report_test_scores
                ) if predictor_type == 'offline' else \
            assess_online_predictor_old(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                test_points = test_points,
                minibatch_size = minibatch_size[predictor_name],
                report_test_scores = report_test_scores
                ) if predictor_type == 'online' else \
            assess_incremental_predictor_old(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                sampling_points = np.sort(np.unique(np.ceil(test_points/dataset.training_set.n_samples).astype(int))),
                accumulation_function='mean',
                report_test_scores = report_test_scores
                )
    print 'Done!'

    return records