예제 #1
0
def demo_gan_mnist(n_epochs=20,
                   minibatch_size=20,
                   n_discriminator_steps=1,
                   noise_dim=10,
                   plot_period=100,
                   rng=1234):
    """
    Train a Generative Adversarial network on MNIST data, showing generated samples as training progresses.

    :param n_epochs: Number of epochs to train
    :param minibatch_size: Size of minibatch to feed in each training iteration
    :param n_discriminator_steps: Number of steps training discriminator for every step of training generator
    :param noise_dim: Dimensionality of latent space (from which random samples are pulled)
    :param plot_period: Plot every N training iterations
    :param rng: Random number generator or seed
    """

    net = GenerativeAdversarialNetwork(
        discriminator=MultiLayerPerceptron.from_init(w_init=0.01,
                                                     layer_sizes=[784, 100, 1],
                                                     hidden_activation='relu',
                                                     output_activation='sig',
                                                     rng=rng),
        generator=MultiLayerPerceptron.from_init(
            w_init=0.1,
            layer_sizes=[noise_dim, 200, 784],
            hidden_activation='relu',
            output_activation='sig',
            rng=rng),
        noise_dim=noise_dim,
        optimizer=AdaMax(0.001),
        rng=rng)

    data = get_mnist_dataset(flat=True).training_set.input

    f_train_discriminator = net.train_discriminator.compile()
    f_train_generator = net.train_generator.compile()
    f_generate = net.generate.compile()

    for i, minibatch in enumerate(
            minibatch_iterate(data,
                              n_epochs=n_epochs,
                              minibatch_size=minibatch_size)):
        f_train_discriminator(minibatch)
        print 'Trained Discriminator'
        if i % n_discriminator_steps == n_discriminator_steps - 1:
            f_train_generator(n_samples=minibatch_size)
            print 'Trained Generator'
        if i % plot_period == 0:
            samples = f_generate(n_samples=minibatch_size)
            dbplot(minibatch.reshape(-1, 28, 28), "Real")
            dbplot(samples.reshape(-1, 28, 28), "Counterfeit")
            print 'Disp'
예제 #2
0
def mnist_adamax_showdown(hidden_size=300, n_epochs=10, n_tests=20):

    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda optimizer: GradientBasedPredictor(
        function=MultiLayerPerceptron.from_init(layer_sizes=[
            dataset.input_size, hidden_size, dataset.n_categories
        ],
                                                hidden_activation='sig',
                                                output_activation='lin',
                                                w_init=0.01,
                                                rng=5),
        cost_function=softmax_negative_log_likelihood,
        optimizer=optimizer,
    ).compile()

    return compare_predictors(dataset=dataset,
                              online_predictors={
                                  'sgd':
                                  make_mlp(SimpleGradientDescent(eta=0.1)),
                                  'adamax': make_mlp(AdaMax(alpha=1e-3)),
                              },
                              minibatch_size=20,
                              test_epochs=sqrtspace(0, n_epochs, n_tests),
                              evaluation_function=percent_argmax_correct)
예제 #3
0
def test_symbolic_predicors():
    """
    This test is meant to serves as both a test and tutorial for how to use a symbolic predictor.
    It shows how to construct a symbolic predictor using a function, cost function, and optimizer.
    It then trains this predictor on a synthetic toy dataset and demonstrates that it has learned.
    """

    dataset = get_synthetic_clusters_dataset()

    symbolic_predictor = GradientBasedPredictor(
        function = MultiLayerPerceptron.from_init(
            layer_sizes = [dataset.input_size, 100, dataset.n_categories],
            output_activation='softmax',
            w_init = 0.1,
            rng = 3252
            ),
        cost_function=negative_log_likelihood_dangerous,
        optimizer=SimpleGradientDescent(eta = 0.1),
        )

    predictor = symbolic_predictor.compile()
    # .compile() turns the symbolic predictor into an IPredictor object, which can be called with numpy arrays.

    init_score = percent_argmax_correct(predictor.predict(dataset.test_set.input), dataset.test_set.target)
    for x_m, y_m in zip_minibatch_iterate([dataset.training_set.input, dataset.training_set.target], minibatch_size=10, n_epochs=20):
        predictor.train(x_m, y_m)
    final_score = percent_argmax_correct(predictor.predict(dataset.test_set.input), dataset.test_set.target)

    print 'Initial score: %s%%.  Final score: %s%%' % (init_score, final_score)
    assert init_score < 30
    assert final_score > 98
예제 #4
0
def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20):

    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda optimizer: GradientBasedPredictor(
            function = MultiLayerPerceptron.from_init(
                layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories],
                hidden_activation='sig',
                output_activation='lin',
                w_init = 0.01,
                rng = 5
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = optimizer,
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)),
            'adamax': make_mlp(AdaMax(alpha = 1e-3)),
            },
        minibatch_size = 20,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
예제 #5
0
def test_predictor_pickling():

    dataset = get_synthetic_clusters_dataset()

    predictor_constructor = lambda: GradientBasedPredictor(
        function = MultiLayerPerceptron.from_init(
            layer_sizes = [dataset.input_shape[0], 100, dataset.n_categories],
            output_activation='softmax',
            w_init = lambda n_in, n_out, rng = np.random.RandomState(3252): 0.1*rng.randn(n_in, n_out)
            ),
        cost_function=negative_log_likelihood_dangerous,
        optimizer=SimpleGradientDescent(eta = 0.1),
        ).compile()

    evaluate = lambda pred: evaluate_predictor(pred, dataset.test_set, percent_argmax_correct)

    # Train up predictor and save params
    predictor = predictor_constructor()
    pre_training_score = evaluate(predictor)
    assert pre_training_score < 35
    train_online_predictor(predictor, dataset.training_set, minibatch_size=20, n_epochs=3)
    post_training_score = evaluate(predictor)
    assert post_training_score > 95

    with pytest.raises(PicklingError):
        # TODO: Fix the PicklingError
        trained_predictor_string = pickle.dumps(predictor)

        # Instantiate new predictor and load params
        new_predictor = pickle.loads(trained_predictor_string)
        loaded_score = evaluate(new_predictor)
        assert loaded_score == post_training_score > 95
예제 #6
0
def compare_example_predictors(
    n_epochs=5,
    n_tests=20,
    minibatch_size=10,
):
    """
    This demo shows how we can compare different online predictors.  The demo trains both predictors on the dataset,
    returning an object that contains the results.

    :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break.
    """

    dataset = get_mnist_dataset(flat=True)
    # "Flatten" the 28x28 inputs to a 784-d vector

    if is_test_mode():
        # Shorten the dataset so we run through it quickly in test mode.
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 3

    # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest.
    # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and
    #   then compile it into an IPredictor object
    # - The Perceptron directly implements the IPredictor interface.
    # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method.
    learning_curve_data = compare_predictors(
        dataset=dataset,
        online_predictors={
            'Perceptron':
            Perceptron(w=np.zeros((dataset.input_size, dataset.n_categories)),
                       alpha=0.001).
            to_categorical(
                n_categories=dataset.n_categories
            ),  # .to_categorical allows the perceptron to be trained on integer labels.
            'MLP':
            GradientBasedPredictor(
                function=MultiLayerPerceptron.from_init(
                    layer_sizes=[
                        dataset.input_size, 500, dataset.n_categories
                    ],
                    hidden_activation='sig',  # Sigmoidal hidden units
                    output_activation=
                    'softmax',  # Softmax output unit, since we're doing multinomial classification
                    w_init=0.01,
                    rng=5),
                cost_function=
                negative_log_likelihood_dangerous,  # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax.
                optimizer=SimpleGradientDescent(eta=0.1),
            ).compile(),  # .compile() returns an IPredictor
        },
        offline_predictors={'RF': RandomForestClassifier(n_estimators=40)},
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct  # Compares one-hot
    )
    # Results is a LearningCurveData object
    return learning_curve_data
예제 #7
0
def compare_example_predictors(
        n_epochs = 5,
        n_tests = 20,
        minibatch_size = 10,
    ):
    """
    This demo shows how we can compare different online predictors.  The demo trains both predictors on the dataset,
    returning an object that contains the results.

    :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break.
    """

    dataset = get_mnist_dataset(flat = True)
    # "Flatten" the 28x28 inputs to a 784-d vector

    if is_test_mode():
        # Shorten the dataset so we run through it quickly in test mode.
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 3

    # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest.
    # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and
    #   then compile it into an IPredictor object
    # - The Perceptron directly implements the IPredictor interface.
    # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method.
    learning_curve_data = compare_predictors(
        dataset = dataset,
        online_predictors = {
            'Perceptron': Perceptron(
                w = np.zeros((dataset.input_size, dataset.n_categories)),
                alpha = 0.001
                ).to_categorical(n_categories = dataset.n_categories),  # .to_categorical allows the perceptron to be trained on integer labels.
            'MLP': GradientBasedPredictor(
                function = MultiLayerPerceptron.from_init(
                    layer_sizes=[dataset.input_size, 500, dataset.n_categories],
                    hidden_activation='sig',  # Sigmoidal hidden units
                    output_activation='softmax',  # Softmax output unit, since we're doing multinomial classification
                    w_init = 0.01,
                    rng = 5
                ),
                cost_function = negative_log_likelihood_dangerous,  # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax.
                optimizer = SimpleGradientDescent(eta = 0.1),
                ).compile(),  # .compile() returns an IPredictor
            },
        offline_predictors={
            'RF': RandomForestClassifier(n_estimators = 40)
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct  # Compares one-hot
        )
    # Results is a LearningCurveData object
    return learning_curve_data
예제 #8
0
def mlp_normalization(hidden_size=300,
                      n_epochs=30,
                      n_tests=50,
                      minibatch_size=20):
    """
    Compare mlp with different schemes for normalizing input.

    regular: Regular vanilla MLP
    normalize: Mean-subtract/normalize over minibatch
    normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable
        (per-unit) scale parameter.

    Conclusions: No significant benefit to scale parameter.  Normalizing gives
    a head start but incurs a small cost later on.  But really all classifiers are quite similar.

    :param hidden_size: Size of hidden layer
    """
    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda normalize, scale: GradientBasedPredictor(
        function=MultiLayerPerceptron.from_init(layer_sizes=[
            dataset.input_size, hidden_size, dataset.n_categories
        ],
                                                hidden_activation='sig',
                                                output_activation='lin',
                                                normalize_minibatch=normalize,
                                                scale_param=scale,
                                                w_init=0.01,
                                                rng=5),
        cost_function=softmax_negative_log_likelihood,
        optimizer=SimpleGradientDescent(eta=0.1),
    ).compile()

    return compare_predictors(dataset=dataset,
                              online_predictors={
                                  'regular':
                                  make_mlp(normalize=False, scale=False),
                                  'normalize':
                                  make_mlp(normalize=True, scale=False),
                                  'normalize and scale':
                                  make_mlp(normalize=True, scale=True),
                              },
                              minibatch_size=minibatch_size,
                              test_epochs=sqrtspace(0, n_epochs, n_tests),
                              evaluation_function=percent_argmax_correct)
예제 #9
0
def test_mlp():

    assert_online_predictor_not_broken(
        predictor_constructor=lambda n_dim_in, n_dim_out:
        GradientBasedPredictor(
            function=MultiLayerPerceptron.from_init(
                layer_sizes=[n_dim_in, 100, n_dim_out],
                output_activation='softmax',
                w_init=0.1,
                rng=3252),
            cost_function=negative_log_likelihood_dangerous,
            optimizer=SimpleGradientDescent(eta=0.1),
        ).compile(),
        categorical_target=True,
        minibatch_size=10,
        n_epochs=2)
예제 #10
0
def mlp_normalization(hidden_size = 300, n_epochs = 30, n_tests = 50, minibatch_size=20):
    """
    Compare mlp with different schemes for normalizing input.

    regular: Regular vanilla MLP
    normalize: Mean-subtract/normalize over minibatch
    normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable
        (per-unit) scale parameter.

    Conclusions: No significant benefit to scale parameter.  Normalizing gives
    a head start but incurs a small cost later on.  But really all classifiers are quite similar.

    :param hidden_size: Size of hidden layer
    """
    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda normalize, scale: GradientBasedPredictor(
            function = MultiLayerPerceptron.from_init(
                layer_sizes=[dataset.input_size, hidden_size, dataset.n_categories],
                hidden_activation='sig',
                output_activation='lin',
                normalize_minibatch=normalize,
                scale_param=scale,
                w_init = 0.01,
                rng = 5
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = SimpleGradientDescent(eta = 0.1),
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'regular': make_mlp(normalize = False, scale = False),
            'normalize': make_mlp(normalize=True, scale = False),
            'normalize and scale': make_mlp(normalize=True, scale = True),
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
예제 #11
0
def train_conventional_mlp_on_mnist(hidden_sizes,
                                    n_epochs=50,
                                    w_init='xavier-both',
                                    minibatch_size=20,
                                    rng=1234,
                                    optimizer='sgd',
                                    hidden_activations='relu',
                                    output_activation='softmax',
                                    learning_rate=0.01,
                                    cost_function='nll',
                                    use_bias=True,
                                    l1_loss=0,
                                    l2_loss=0,
                                    test_on='training+test'):

    dataset = get_mnist_dataset(flat=True)\

    if output_activation != 'softmax':
        dataset = dataset.to_onehot()

    all_layer_sizes = [dataset.input_size
                       ] + hidden_sizes + [dataset.n_categories]
    weights = initialize_network_params(layer_sizes=all_layer_sizes,
                                        mag=w_init,
                                        base_dist='normal',
                                        include_biases=False,
                                        rng=rng)
    net = MultiLayerPerceptron(weights=weights,
                               hidden_activation=hidden_activations,
                               output_activation=output_activation,
                               use_bias=use_bias)
    predictor = GradientBasedPredictor(
        function=net,
        cost_function=get_named_cost_function(cost_function),
        optimizer=get_named_optimizer(optimizer, learning_rate=learning_rate),
        regularization_cost=lambda params: sum(l1_loss * abs(p_).sum(
        ) + l2_loss * (p_**2).sum() if p_.ndim == 2 else 0
                                               for p_ in params)).compile()
    assess_online_predictor(predictor=predictor,
                            dataset=dataset,
                            evaluation_function='percent_argmax_correct',
                            test_epochs=range(0, n_epochs, 1),
                            test_on=test_on,
                            minibatch_size=minibatch_size)
    ws = [p.get_value() for p in net.parameters]
    return ws
예제 #12
0
def backprop_vs_difference_target_prop(hidden_sizes=[240],
                                       n_epochs=10,
                                       minibatch_size=20,
                                       n_tests=20):

    dataset = get_mnist_dataset(flat=True)
    dataset = dataset.process_with(
        targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    return compare_predictors(
        dataset=dataset,
        online_predictors={
            'backprop-mlp':
            GradientBasedPredictor(
                function=MultiLayerPerceptron.from_init(
                    layer_sizes=[dataset.input_size] + hidden_sizes +
                    [dataset.n_categories],
                    hidden_activation='tanh',
                    output_activation='sig',
                    w_init=0.01,
                    rng=5),
                cost_function=mean_squared_error,
                optimizer=AdaMax(0.01),
            ).compile(),
            'difference-target-prop-mlp':
            DifferenceTargetMLP.from_initializer(
                input_size=dataset.input_size,
                output_size=dataset.target_size,
                hidden_sizes=hidden_sizes,
                optimizer_constructor=lambda: AdaMax(0.01),
                w_init=0.01,
                noise=1,
            ).compile()
        },
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
    )
예제 #13
0
파일: test_mlp.py 프로젝트: qyx268/plato
def test_mlp():

    assert_online_predictor_not_broken(
        predictor_constructor = lambda n_dim_in, n_dim_out:
            GradientBasedPredictor(
                function = MultiLayerPerceptron.from_init(
                    layer_sizes = [n_dim_in, 100, n_dim_out],
                    output_activation='softmax',
                    w_init = 0.1,
                    rng = 3252
                    ),
                cost_function=negative_log_likelihood_dangerous,
                optimizer=SimpleGradientDescent(eta = 0.1),
                ).compile(),
        categorical_target=True,
        minibatch_size=10,
        n_epochs=2
        )
예제 #14
0
def backprop_vs_difference_target_prop(
        hidden_sizes = [240],
        n_epochs = 10,
        minibatch_size = 20,
        n_tests = 20
        ):

    dataset = get_mnist_dataset(flat = True)
    dataset = dataset.process_with(targets_processor=lambda (x, ): (OneHotEncoding(10)(x).astype(int), ))

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    set_default_figure_size(12, 9)

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'backprop-mlp': GradientBasedPredictor(
                function = MultiLayerPerceptron.from_init(
                layer_sizes=[dataset.input_size]+hidden_sizes+[dataset.n_categories],
                    hidden_activation='tanh',
                    output_activation='sig',
                    w_init = 0.01,
                    rng = 5
                    ),
                cost_function = mean_squared_error,
                optimizer = AdaMax(0.01),
                ).compile(),
            'difference-target-prop-mlp': DifferenceTargetMLP.from_initializer(
                input_size = dataset.input_size,
                output_size = dataset.target_size,
                hidden_sizes = hidden_sizes,
                optimizer_constructor = lambda: AdaMax(0.01),
                w_init=0.01,
                noise = 1,
            ).compile()
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct,
        )
예제 #15
0
def test_bare_bones_mlp(seed=1234):
    """
    This verifies that the MLP works.  It's intentionally not using any wrappers on top of MLP to show its "bare bones"
    usage.  Wrapping in GradientBasedPredictor can simplify usage - see test_symbolic_predictors.
    """

    dataset = get_synthetic_clusters_dataset()

    mlp = MultiLayerPerceptron.from_init(
        layer_sizes=[dataset.input_size, 20, dataset.n_categories],
        hidden_activation='relu',
        output_activation='softmax',
        w_init=0.01,
        rng=seed)

    fwd_fcn = mlp.compile()

    optimizer = SimpleGradientDescent(eta=0.1)

    @symbolic_updater
    def train(x, y):
        output = mlp(x)
        cost = negative_log_likelihood_dangerous(output, y)
        optimizer(cost, mlp.parameters)

    train_fcn = train.compile()

    init_score = percent_argmax_correct(fwd_fcn(dataset.test_set.input),
                                        dataset.test_set.target)

    for x_m, y_m in zip_minibatch_iterate(
        [dataset.training_set.input, dataset.training_set.target],
            minibatch_size=10,
            n_epochs=20):
        train_fcn(x_m, y_m)

    final_score = percent_argmax_correct(fwd_fcn(dataset.test_set.input),
                                         dataset.test_set.target)
    print 'Initial score: %s%%.  Final score: %s%%' % (init_score, final_score)
    assert init_score < 30
    assert final_score > 98
예제 #16
0
파일: test_mlp.py 프로젝트: qyx268/plato
def test_bare_bones_mlp(seed = 1234):
    """
    This verifies that the MLP works.  It's intentionally not using any wrappers on top of MLP to show its "bare bones"
    usage.  Wrapping in GradientBasedPredictor can simplify usage - see test_symbolic_predictors.
    """

    dataset = get_synthetic_clusters_dataset()

    mlp = MultiLayerPerceptron.from_init(
        layer_sizes = [dataset.input_size, 20, dataset.n_categories],
        hidden_activation = 'relu',
        output_activation = 'softmax',
        w_init = 0.01,
        rng = seed
        )

    fwd_fcn = mlp.compile()

    optimizer = SimpleGradientDescent(eta = 0.1)

    @symbolic_updater
    def train(x, y):
        output = mlp(x)
        cost = negative_log_likelihood_dangerous(output, y)
        optimizer(cost, mlp.parameters)

    train_fcn = train.compile()

    init_score = percent_argmax_correct(fwd_fcn(dataset.test_set.input), dataset.test_set.target)

    for x_m, y_m in zip_minibatch_iterate([dataset.training_set.input, dataset.training_set.target], minibatch_size=10, n_epochs=20):
        train_fcn(x_m, y_m)

    final_score = percent_argmax_correct(fwd_fcn(dataset.test_set.input), dataset.test_set.target)
    print 'Initial score: %s%%.  Final score: %s%%' % (init_score, final_score)
    assert init_score < 30
    assert final_score > 98
def get_predictor(predictor_type, input_size, target_size, hidden_sizes = [240], output_activation = 'sigm',
        hidden_activation = 'tanh', optimizer = 'adamax', learning_rate = 0.01, noise = 1, w_init=0.01, rng = None):
    """
    Specify parameters that will allow you to construct a predictor

    :param predictor_type: String identifying the predictor class (see below)
    :param input_size: Integer size of the input vector.  Integer
    :param target_size: Integer size of the target vector
    :param hidden_sizes:
    :param input_activation:
    :param hidden_activation:
    :param optimizer:
    :param learning_rate:
    :return:
    """
    return {
        'MLP': lambda: GradientBasedPredictor(
            function = MultiLayerPerceptron.from_init(
                layer_sizes = [input_size] + hidden_sizes + [target_size],
                hidden_activation=hidden_activation,
                output_activation=output_activation,
                w_init = w_init,
                rng = rng
                ),
            cost_function = mean_squared_error,
            optimizer = get_named_optimizer(optimizer, learning_rate),
            ).compile(),
        'DTP': lambda: DifferenceTargetMLP.from_initializer(
            input_size = input_size,
            output_size = target_size,
            hidden_sizes = hidden_sizes,
            optimizer_constructor = lambda: get_named_optimizer(optimizer, learning_rate),
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init_mag=w_init,
            noise = noise,
            rng = rng,
            ).compile(),
        'PreAct-DTP': lambda: DifferenceTargetMLP.from_initializer(
            input_size = input_size,
            output_size = target_size,
            hidden_sizes = hidden_sizes,
            optimizer_constructor = lambda: get_named_optimizer(optimizer, learning_rate),
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init_mag=w_init,
            noise = noise,
            layer_constructor = PreActivationDifferenceTargetLayer.from_initializer,
            rng = rng,
            ).compile(),
        'Linear-DTP': lambda: LinearDifferenceTargetMLP.from_initializer(
            input_size = input_size,
            output_size = target_size,
            hidden_sizes = hidden_sizes,
            optimizer_constructor = lambda: get_named_optimizer(optimizer, learning_rate),
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation='linear',
            w_init_mag=w_init,
            noise = noise,
            rng = rng,
            # layer_constructor = LinearDifferenceTargetLayer.from_initializer
            ).compile(),
        }[predictor_type]()
예제 #18
0
def compare_spiking_to_nonspiking(hidden_sizes = [300, 300], eta=0.01, w_init=0.01, fractional = False, n_epochs = 20,
                                  forward_discretize = 'rect-herding', back_discretize = 'noreset-herding', test_discretize='rect-herding', save_results = False):

    mnist = get_mnist_dataset(flat=True).to_onehot()
    test_epochs=[0.0, 0.05, 0.1, 0.2, 0.5]+range(1, n_epochs+1)

    if is_test_mode():
        mnist = mnist.shorten(500)
        eta = 0.01
        w_init=0.01
        test_epochs = [0.0, 0.05, 0.1]

    spiking_net = JavaSpikingNetWrapper.from_init(
        fractional = fractional,
        depth_first=False,
        smooth_grads = False,
        forward_discretize = forward_discretize,
        back_discretize = back_discretize,
        test_discretize = test_discretize,
        w_init=w_init,
        hold_error=True,
        rng = 1234,
        n_steps = 10,
        eta=eta,
        layer_sizes=[784]+hidden_sizes+[10],
        )

    relu_net = GradientBasedPredictor(
        MultiLayerPerceptron.from_init(
            hidden_activation = 'relu',
            output_activation = 'relu',
            layer_sizes=[784]+hidden_sizes+[10],
            use_bias=False,
            w_init=w_init,
            rng=1234,
            ),
        cost_function = 'mse',
        optimizer=GradientDescent(eta)
        ).compile()

    # Listen for spikes
    forward_eavesdropper = jp.JClass('nl.uva.deepspike.eavesdroppers.SpikeCountingEavesdropper')()
    backward_eavesdropper = jp.JClass('nl.uva.deepspike.eavesdroppers.SpikeCountingEavesdropper')()
    for lay in spiking_net.jnet.layers:
        lay.forward_herder.add_eavesdropper(forward_eavesdropper)
    for lay in spiking_net.jnet.layers[1:]:
        lay.backward_herder.add_eavesdropper(backward_eavesdropper)
    spiking_net.jnet.error_counter.add_eavesdropper(backward_eavesdropper)
    forward_counts = []
    backward_counts = []

    def register_counts():
        forward_counts.append(forward_eavesdropper.get_count())
        backward_counts.append(backward_eavesdropper.get_count())

    results = compare_predictors(
        dataset=mnist,
        online_predictors={
            'Spiking-MLP': spiking_net,
            'ReLU-MLP': relu_net,
            },
        test_epochs=test_epochs,
        online_test_callbacks=lambda p: register_counts() if p is spiking_net else None,
        minibatch_size = 1,
        test_on = 'training+test',
        evaluation_function=percent_argmax_incorrect,
        )

    spiking_params = [np.array(lay.forward_weights.w.asFloat()).copy() for lay in spiking_net.jnet.layers]
    relu_params = [param.get_value().astype(np.float64) for param in relu_net.parameters]

    # See what the score is when we apply the final spiking weights to the
    offline_trained_spiking_net = JavaSpikingNetWrapper(
        ws=relu_params,
        fractional = fractional,
        depth_first=False,
        smooth_grads = False,
        forward_discretize = forward_discretize,
        back_discretize = back_discretize,
        test_discretize = test_discretize,
        hold_error=True,
        n_steps = 10,
        eta=eta,
        )

    # for spiking_layer, p in zip(spiking_net.jnet.layers, relu_params):
    #     spiking_layer.w = p.astype(np.float64)

    error = [
        ('Test', percent_argmax_incorrect(offline_trained_spiking_net.predict(mnist.test_set.input), mnist.test_set.target)),
        ('Training', percent_argmax_incorrect(offline_trained_spiking_net.predict(mnist.training_set.input), mnist.training_set.target))
        ]
    results['Spiking-MLP with ReLU weights'] = LearningCurveData()
    results['Spiking-MLP with ReLU weights'].add(None, error)
    print 'Spiking-MLP with ReLU weights: %s' % error
    # --------------------------------------------------------------------------

    # See what the score is when we plug the spiking weights into the ReLU net.
    for param, sval in zip(relu_net.parameters, spiking_params):
        param.set_value(sval)
    error = [
        ('Test', percent_argmax_incorrect(relu_net.predict(mnist.test_set.input), mnist.test_set.target)),
        ('Training', percent_argmax_incorrect(relu_net.predict(mnist.training_set.input), mnist.training_set.target))
        ]
    results['ReLU-MLP with Spiking weights'] = LearningCurveData()
    results['ReLU-MLP with Spiking weights'].add(None, error)
    print 'ReLU-MLP with Spiking weights: %s' % error
    # --------------------------------------------------------------------------

    if save_results:
        with open("mnist_relu_vs_spiking_results-%s.pkl" % datetime.now(), 'w') as f:
            pickle.dump(results, f)

    # Problem: this currently includes test
    forward_rates = np.diff(forward_counts) / (np.diff(test_epochs)*60000)
    backward_rates = np.diff(backward_counts) / (np.diff(test_epochs)*60000)

    plt.figure('ReLU vs Spikes')
    plt.subplot(211)
    plot_learning_curves(results, title = "MNIST Learning Curves", hang=False, figure_name='ReLU vs Spikes', xscale='linear', yscale='log', y_title='Percent Error')
    plt.subplot(212)
    plt.plot(test_epochs[1:], forward_rates)
    plt.plot(test_epochs[1:], backward_rates)
    plt.xlabel('Epoch')
    plt.ylabel('n_spikes')
    plt.legend(['Mean Forward Spikes', 'Mean Backward Spikes'], loc='best')
    plt.interactive(is_test_mode())
    plt.show()
예제 #19
0
def demo_mnist_mlp(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        hidden_sizes = [300],
        w_init = 0.01,
        hidden_activation = 'tanh',
        output_activation = 'softmax',
        cost = 'nll-d',
        visualize_params = False,
        n_test_points = 30,
        n_epochs = 10,
        max_training_samples = None,
        use_bias = True,
        onehot = False,
        rng = 1234,
        plot = False,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples)

    if onehot:
        dataset = dataset.to_onehot()

    if minibatch_size == 'full':
        minibatch_size = dataset.training_set.n_samples

    optimizer = get_named_optimizer(name = optimizer, learning_rate=learning_rate)

    # Setup the training and test functions
    predictor = GradientBasedPredictor(
        function = MultiLayerPerceptron.from_init(
            layer_sizes=[dataset.input_size]+hidden_sizes+[10],
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init = w_init,
            use_bias=use_bias,
            rng = rng,
            ),
        cost_function=cost,
        optimizer=optimizer
        ).compile()  # .compile() turns the GradientBasedPredictor, which works with symbolic variables, into a real one that takes and returns arrays.

    def vis_callback(xx):
        p = predictor.symbolic_predictor._function
        in_layer = {
            'Layer[0].w': p.layers[0].linear_transform._w.get_value().T.reshape(-1, 28, 28),
            'Layer[0].b': p.layers[0].linear_transform._b.get_value(),
            }
        other_layers = [{'Layer[%s].w' % (i+1): l.linear_transform._w.get_value(), 'Layer[%s].b' % (i+1): l.linear_transform._b.get_value()} for i, l in enumerate(p.layers[1:])]
        dbplot(dict(in_layer.items() + sum([o.items() for o in other_layers], [])))

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size,
        test_callback=vis_callback if visualize_params else None
    )

    if plot:
        plot_learning_curves(results)
예제 #20
0
def demo_herding_network(kp=.1,
                         kd=1.,
                         kp_back=None,
                         kd_back=None,
                         hidden_sizes=[
                             200,
                         ],
                         n_epochs=50,
                         onehot=False,
                         parallel=False,
                         learning_rate=0.01,
                         dataset='mnist',
                         hidden_activation='relu',
                         adaptive=True,
                         adaptation_rate=0.001,
                         output_activation='softmax',
                         loss='nll',
                         fwd_quantizer='herd',
                         back_quantizer='same',
                         minibatch_size=1,
                         swap_mlp=False,
                         plot=False,
                         test_period=.5,
                         grad_calc='true',
                         rng=1234):

    dataset = get_mnist_dataset(
        flat=True, join_train_and_val=True
    ) if dataset == 'mnist' else get_temporal_mnist_dataset(
        flat=True, join_train_and_val=True)
    if onehot:
        dataset = dataset.to_onehot()
    ws = initialize_network_params(layer_sizes=[28 * 28] + hidden_sizes + [10],
                                   mag='xavier-both',
                                   include_biases=False,
                                   rng=rng)

    if is_test_mode():
        dataset = dataset.shorten(500)
        n_epochs = 0.1
        test_period = 0.03

    if kp_back is None:
        kp_back = kp
    if kd_back is None:
        kd_back = kd
    if back_quantizer == 'same':
        back_quantizer = fwd_quantizer

    if adaptive:
        encdec = lambda: PDAdaptiveEncoderDecoder(kp=kp,
                                                  kd=kd,
                                                  adaptation_rate=
                                                  adaptation_rate,
                                                  quantization=fwd_quantizer)
        encdec_back = lambda: PDAdaptiveEncoderDecoder(
            kp=kp_back,
            kd=kd_back,
            adaptation_rate=adaptation_rate,
            quantization=back_quantizer)
    else:
        encdec = PDEncoderDecoder(kp=kp, kd=kd, quantization=fwd_quantizer)
        encdec_back = PDEncoderDecoder(kp=kp_back,
                                       kd=kd_back,
                                       quantization=back_quantizer)

    if swap_mlp:
        if not parallel:
            assert minibatch_size == 1, "Unfair comparison otherwise, sorry buddy, can't let you do that."
        net = GradientBasedPredictor(
            function=MultiLayerPerceptron.from_weights(
                weights=ws,
                hidden_activations=hidden_activation,
                output_activation=output_activation,
            ),
            cost_function=loss,
            optimizer=GradientDescent(learning_rate),
        )
        prediction_funcs = net.predict.compile()
    else:
        net = PDHerdingNetwork(
            ws=ws,
            encdec=encdec,
            encdec_back=encdec_back,
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            optimizer=GradientDescent(learning_rate),
            minibatch_size=minibatch_size if parallel else 1,
            grad_calc=grad_calc,
            loss=loss)
        noise_free_forward_pass = MultiLayerPerceptron.from_weights(
            weights=[layer.w for layer in net.layers],
            biases=[layer.b for layer in net.layers],
            hidden_activations=hidden_activation,
            output_activation=output_activation).compile()
        prediction_funcs = [('noise_free', noise_free_forward_pass),
                            ('herded', net.predict.compile())]

    op_count_info = []

    def test_callback(info, score):
        if plot:
            dbplot(net.layers[0].w.get_value().T.reshape(-1, 28, 28),
                   'w0',
                   cornertext='Epoch {}'.format(info.epoch))
        if swap_mlp:
            all_layer_sizes = [dataset.input_size
                               ] + hidden_sizes + [dataset.target_size]
            fwd_ops = [
                info.sample * d1 * d2
                for d1, d2 in zip(all_layer_sizes[:-1], all_layer_sizes[1:])
            ]
            back_ops = [
                info.sample * d1 * d2
                for d1, d2 in zip(all_layer_sizes[:-1], all_layer_sizes[1:])
            ]
            update_ops = [
                info.sample * d1 * d2
                for d1, d2 in zip(all_layer_sizes[:-1], all_layer_sizes[1:])
            ]
        else:
            fwd_ops = [
                layer_.fwd_op_count.get_value() for layer_ in net.layers
            ]
            back_ops = [
                layer_.back_op_count.get_value() for layer_ in net.layers
            ]
            update_ops = [
                layer_.update_op_count.get_value() for layer_ in net.layers
            ]
        if info.epoch != 0:
            with IndentPrint('Mean Ops by epoch {}'.format(info.epoch)):
                print 'Fwd: {}'.format([
                    si_format(ops / info.epoch,
                              format_str='{value} {prefix}Ops')
                    for ops in fwd_ops
                ])
                print 'Back: {}'.format([
                    si_format(ops / info.epoch,
                              format_str='{value} {prefix}Ops')
                    for ops in back_ops
                ])
                print 'Update: {}'.format([
                    si_format(ops / info.epoch,
                              format_str='{value} {prefix}Ops')
                    for ops in update_ops
                ])
        if info.epoch > max(
                0.5, 2 * test_period) and not swap_mlp and score.get_score(
                    'train', 'noise_free') < 20:
            raise Exception("This horse ain't goin' nowhere.")

        op_count_info.append((info, (fwd_ops, back_ops, update_ops)))

    info_score_pairs = train_and_test_online_predictor(
        dataset=dataset,
        train_fcn=net.train.compile(),
        predict_fcn=prediction_funcs,
        minibatch_size=minibatch_size,
        n_epochs=n_epochs,
        test_epochs=('every', test_period),
        score_measure='percent_argmax_correct',
        test_on='training+test',
        test_callback=test_callback)
    return info_score_pairs, op_count_info
예제 #21
0
def demo_mnist_mlp(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        hidden_sizes = [300],
        w_init = 0.01,
        hidden_activation = 'tanh',
        output_activation = 'softmax',
        cost = 'nll-d',
        visualize_params = False,
        n_test_points = 30,
        n_epochs = 10,
        max_training_samples = None,
        use_bias = True,
        onehot = False,
        rng = 1234,
        plot = False,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples)

    if onehot:
        dataset = dataset.to_onehot()

    if minibatch_size == 'full':
        minibatch_size = dataset.training_set.n_samples

    optimizer = get_named_optimizer(name = optimizer, learning_rate=learning_rate)

    # Setup the training and test functions
    predictor = GradientBasedPredictor(
        function = MultiLayerPerceptron.from_init(
            layer_sizes=[dataset.input_size]+hidden_sizes+[10],
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init = w_init,
            use_bias=use_bias,
            rng = rng,
            ),
        cost_function=cost,
        optimizer=optimizer
        ).compile()  # .compile() turns the GradientBasedPredictor, which works with symbolic variables, into a real one that takes and returns arrays.

    def vis_callback(xx):
        p = predictor.symbolic_predictor._function
        in_layer = {
            'Layer[0].w': p.layers[0].linear_transform._w.get_value().T.reshape(-1, 28, 28),
            'Layer[0].b': p.layers[0].linear_transform._b.get_value(),
            }
        other_layers = [{'Layer[%s].w' % (i+1): l.linear_transform._w.get_value(), 'Layer[%s].b' % (i+1): l.linear_transform._b.get_value()} for i, l in enumerate(p.layers[1:])]
        dbplot(dict(in_layer.items() + sum([o.items() for o in other_layers], [])))

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size,
        test_callback=vis_callback if visualize_params else None
    )

    if plot:
        plot_learning_curves(results)
예제 #22
0
def get_predictor(predictor_type,
                  input_size,
                  target_size,
                  hidden_sizes=[240],
                  output_activation='sigm',
                  hidden_activation='tanh',
                  optimizer='adamax',
                  learning_rate=0.01,
                  noise=1,
                  w_init=0.01,
                  use_bias=True,
                  rng=None):
    """
    Specify parameters that will allow you to construct a predictor

    :param predictor_type: String identifying the predictor class (see below)
    :param input_size: Integer size of the input vector.  Integer
    :param target_size: Integer size of the target vector
    :param hidden_sizes:
    :param input_activation:
    :param hidden_activation:
    :param optimizer:
    :param learning_rate:
    :return:
    """
    return {
        'MLP':
        lambda: GradientBasedPredictor(
            function=MultiLayerPerceptron.from_init(
                layer_sizes=[input_size] + hidden_sizes + [target_size],
                hidden_activation=hidden_activation,
                output_activation=output_activation,
                use_bias=use_bias,
                w_init=w_init,
                rng=rng),
            cost_function=mean_squared_error,
            optimizer=get_named_optimizer(optimizer, learning_rate),
        ).compile(),
        'DTP':
        lambda: DifferenceTargetMLP.from_initializer(
            input_size=input_size,
            output_size=target_size,
            hidden_sizes=hidden_sizes,
            optimizer_constructor=lambda: get_named_optimizer(
                optimizer, learning_rate),
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init_mag=w_init,
            noise=noise,
            rng=rng,
            use_bias=use_bias,
        ).compile(),
        'PreAct-DTP':
        lambda: DifferenceTargetMLP.from_initializer(
            input_size=input_size,
            output_size=target_size,
            hidden_sizes=hidden_sizes,
            optimizer_constructor=lambda: get_named_optimizer(
                optimizer, learning_rate),
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init_mag=w_init,
            noise=noise,
            layer_constructor=PreActivationDifferenceTargetLayer.
            from_initializer,
            rng=rng,
            use_bias=use_bias,
        ).compile(),
        'Linear-DTP':
        lambda: LinearDifferenceTargetMLP.from_initializer(
            input_size=input_size,
            output_size=target_size,
            hidden_sizes=hidden_sizes,
            optimizer_constructor=lambda: get_named_optimizer(
                optimizer, learning_rate),
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation='linear',
            w_init_mag=w_init,
            noise=noise,
            rng=rng,
            use_bias=use_bias,
            # layer_constructor = LinearDifferenceTargetLayer.from_initializer
        ).compile(),
    }[predictor_type]()