def mnist_adamax_showdown(hidden_size = 300, n_epochs = 10, n_tests = 20):

    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda optimizer: GradientBasedPredictor(
            function = MultiLayerPerceptron(
                layer_sizes=[hidden_size, dataset.n_categories],
                input_size = dataset.input_size,
                hidden_activation='sig',
                output_activation='lin',
                w_init = normal_w_init(mag = 0.01, seed = 5)
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = optimizer,
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'sgd': make_mlp(SimpleGradientDescent(eta = 0.1)),
            'adamax': make_mlp(AdaMax(alpha = 1e-3)),
            },
        minibatch_size = 20,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
예제 #2
0
    def get_training_fcn(n_gibbs=1,
                         persistent=False,
                         optimizer=SimpleGradientDescent(eta=0.01)):
        @symbolic_updater
        def train(wake_visible):

            wake_hidden = propup(wake_visible)

            persistent_state = sleep_hidden = theano.shared(
                np.zeros(wake_hidden.tag.test_value.shape,
                         dtype=theano.config.floatX),
                name='persistend_hidden_state') if persistent else wake_hidden

            for _ in xrange(n_gibbs):
                sleep_visible = propdown(sleep_hidden)
                sleep_hidden = propup(sleep_visible)

            wake_energy = bridge.free_energy(
                wake_visible) + hidden_layer.free_energy(bridge(wake_visible))
            sleep_energy = bridge.free_energy(
                sleep_visible) + hidden_layer.free_energy(
                    bridge(sleep_visible))
            cost = tt.mean(wake_energy - sleep_energy)

            params = visible_layer.parameters + bridge.parameters + hidden_layer.parameters
            updates = optimizer(cost=cost,
                                parameters=params,
                                constants=[wake_visible, sleep_visible])

            if persistent:
                updates.append((persistent_state, sleep_hidden))

            return updates

        return train
def compare_example_predictors(
        n_epochs = 5,
        n_tests = 20,
        minibatch_size = 10,
        test_mode = False
    ):
    """
    This demo shows how we can compare different online predictors.  The demo trains both predictors on the dataset,
    returning an object that contains the results.

    :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break.
    """

    dataset = get_mnist_dataset(flat = True)
    # "Flatten" the 28x28 inputs to a 784-d vector

    if test_mode:
        # Shorten the dataset so we run through it quickly in test mode.
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 3

    # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest.
    # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and
    #   then compile it into an IPredictor object
    # - The Perceptron directly implements the IPredictor interface.
    # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method.
    learning_curve_data = compare_predictors(
        dataset = dataset,
        online_predictors = {
            'Perceptron': Perceptron(
                w = np.zeros((dataset.input_size, dataset.n_categories)),
                alpha = 0.001
                ).to_categorical(n_categories = dataset.n_categories),  # .to_categorical allows the perceptron to be trained on integer labels.
            'MLP': GradientBasedPredictor(
                function = MultiLayerPerceptron(
                    layer_sizes=[500, dataset.n_categories],
                    input_size = dataset.input_size,
                    hidden_activation='sig',  # Sigmoidal hidden units
                    output_activation='softmax',  # Softmax output unit, since we're doing multinomial classification
                    w_init = normal_w_init(mag = 0.01, seed = 5)
                ),
                cost_function = negative_log_likelihood_dangerous,  # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax.
                optimizer = SimpleGradientDescent(eta = 0.1),
                ).compile(),  # .compile() returns an IPredictor
            },
        offline_predictors={
            'RF': RandomForestClassifier(n_estimators = 40)
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct  # Compares one-hot
        )
    # Results is a LearningCurveData object
    return learning_curve_data
def mlp_normalization(hidden_size = 300, n_epochs = 30, n_tests = 50, minibatch_size=20):
    """
    Compare mlps with different schemes for normalizing input.

    regular: Regular vanilla MLP
    normalize: Mean-subtract/normalize over minibatch
    normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable
        (per-unit) scale parameter.

    Conclusions: No significant benefit to scale parameter.  Normalizing gives
    a head start but incurs a small cost later on.  But really all classifiers are quite similar.

    :param hidden_size: Size of hidden layer
    """
    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda normalize, scale: GradientBasedPredictor(
            function = MultiLayerPerceptron(
                layer_sizes=[hidden_size, dataset.n_categories],
                input_size = dataset.input_size,
                hidden_activation='sig',
                output_activation='lin',
                normalize_minibatch=normalize,
                scale_param=scale,
                w_init = normal_w_init(mag = 0.01, seed = 5)
                ),
            cost_function = softmax_negative_log_likelihood,
            optimizer = SimpleGradientDescent(eta = 0.1),
            ).compile()

    return compare_predictors(
        dataset=dataset,
        online_predictors = {
            'regular': make_mlp(normalize = False, scale = False),
            'normalize': make_mlp(normalize=True, scale = False),
            'normalize and scale': make_mlp(normalize=True, scale = True),
            },
        minibatch_size = minibatch_size,
        test_epochs = sqrtspace(0, n_epochs, n_tests),
        evaluation_function = percent_argmax_correct
        )
예제 #5
0
def test_mlp():

    assert_online_predictor_not_broken(
        predictor_constructor = lambda n_dim_in, n_dim_out:
            GradientBasedPredictor(
                function = MultiLayerPerceptron(
                    layer_sizes = [100, n_dim_out],
                    input_size = n_dim_in,
                    output_activation='softmax',
                    w_init = lambda n_in, n_out, rng = np.random.RandomState(3252): 0.1*rng.randn(n_in, n_out)
                    ),
                cost_function=negative_log_likelihood_dangerous,
                optimizer=SimpleGradientDescent(eta = 0.1),
                ).compile(),
        categorical_target=True,
        minibatch_size=10,
        n_epochs=2
        )
예제 #6
0
def demo_mnist_mlp(test_mode = False):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if test_mode:
        test_period = 200
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30)
    else:
        test_period = 1000
        minibatch_size = 20
        n_epochs = 10
        dataset = get_mnist_dataset()

    # Setup the training and test functions
    classifier = MultiLayerPerceptron(
        layer_sizes=[500, 10],
        input_size = 784,
        hidden_activation='sig',
        output_activation='softmax',
        w_init = normal_w_init(mag = 0.01)
        )
    training_cost_function = normalized_negative_log_likelihood
    optimizer = SimpleGradientDescent(eta = 0.1)
    training_function = SupervisedTrainingFunction(classifier, training_cost_function, optimizer).compile()
    test_cost_function = percent_correct
    test_function = SupervisedTestFunction(classifier, test_cost_function).compile()

    def report_test(i):
        training_cost = test_function(dataset.training_set.input, dataset.training_set.target)
        print 'Training score at iteration %s: %s' % (i, training_cost)
        test_cost = test_function(dataset.test_set.input, dataset.test_set.target)
        print 'Test score at iteration %s: %s' % (i, test_cost)

    # Train and periodically report the test score.
    print 'Running MLP on MNIST Dataset...'
    for i, (_, image_minibatch, label_minibatch) in enumerate(dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = n_epochs, single_channel = True)):
        if i % test_period == 0:
            report_test(i)
        training_function(image_minibatch, label_minibatch)
    report_test('Final')
    print '...Done.'
예제 #7
0
def test_param_serialization():
    """
    Pros -
    :return:
    """

    dataset = get_synthetic_clusters_dataset()

    predictor_constructor = lambda: GradientBasedPredictor(
        function=MultiLayerPerceptron(layer_sizes=[100, dataset.n_categories],
                                      input_size=dataset.input_shape[0],
                                      output_activation='softmax',
                                      w_init=lambda n_in, n_out, rng=np.random.
                                      RandomState(3252): 0.1 * rng.randn(
                                          n_in, n_out)),
        cost_function=negative_log_likelihood_dangerous,
        optimizer=SimpleGradientDescent(eta=0.1),
    ).compile()

    evaluate = lambda pred: evaluate_predictor(pred, dataset.test_set,
                                               percent_argmax_correct)

    # Train up predictor and save params
    predictor = predictor_constructor()
    pre_training_score = evaluate(predictor)
    assert pre_training_score < 35
    train_online_predictor(predictor,
                           dataset.training_set,
                           minibatch_size=20,
                           n_epochs=3)
    post_training_score = evaluate(predictor)
    assert post_training_score > 95
    trained_param_string = dumps_params(predictor)

    # Instantiate new predictor and load params
    new_predictor = predictor_constructor()
    new_pre_training_score = evaluate(new_predictor)
    assert new_pre_training_score < 35
    loads_params(new_predictor, trained_param_string)
    loaded_score = evaluate(new_predictor)
    assert loaded_score == post_training_score > 95
예제 #8
0
def demo_dbn_mnist(plot=True, test_mode=True):
    """
    In this demo we train an RBM on the MNIST input data (labels are ignored).  We plot the state of a markov chanin
    that is being simulaniously sampled from the RBM, and the parameters of the RBM.
    """

    set_enable_omniscence(True)
    minibatch_size = 20
    dataset = get_mnist_dataset().process_with(
        inputs_processor=lambda (x, ): (x.reshape(x.shape[0], -1), ))
    w_init = lambda n_in, n_out: 0.01 * np.random.randn(n_in, n_out)
    n_training_epochs_1 = 20
    n_training_epochs_2 = 20
    check_period = 300

    if test_mode:
        n_training_epochs_1 = 0.01
        n_training_epochs_2 = 0.01
        check_period = 100

    dbn = DeepBeliefNet(layers={
        'vis': StochasticLayer('bernoulli'),
        'hid': StochasticLayer('bernoulli'),
        'ass': StochasticLayer('bernoulli'),
        'lab': StochasticLayer('bernoulli'),
    },
                        bridges={
                            ('vis', 'hid'):
                            FullyConnectedBridge(w=w_init(784, 500), b_rev=0),
                            ('hid', 'ass'):
                            FullyConnectedBridge(w=w_init(500, 500), b_rev=0),
                            ('lab', 'ass'):
                            FullyConnectedBridge(w=w_init(10, 500), b_rev=0)
                        })

    # Compile the functions you're gonna use.
    train_first_layer = dbn.get_constrastive_divergence_function(
        visible_layers='vis',
        hidden_layers='hid',
        optimizer=SimpleGradientDescent(eta=0.01),
        n_gibbs=1,
        persistent=True).compile()
    free_energy_of_first_layer = dbn.get_free_energy_function(
        visible_layers='vis', hidden_layers='hid').compile()
    train_second_layer = dbn.get_constrastive_divergence_function(
        visible_layers=('hid', 'lab'),
        hidden_layers='ass',
        input_layers=('vis', 'lab'),
        n_gibbs=1,
        persistent=True).compile()
    predict_label = dbn.get_inference_function(input_layers='vis',
                                               output_layers='lab',
                                               path=[('vis', 'hid'),
                                                     ('hid', 'ass'),
                                                     ('ass', 'lab')],
                                               smooth=True).compile()

    encode_label = OneHotEncoding(n_classes=10)

    # Step 1: Train the first layer, plotting the weights and persistent chain state.
    if plot:
        train_first_layer.set_debug_variables(
            lambda: {
                'weights':
                dbn._bridges['vis', 'hid']._w.T.reshape((-1, 28, 28)),
                'smooth_vis_state':
                dbn.get_inference_function('hid', 'vis', smooth=True).
                symbolic_stateless(*train_first_layer.locals()[
                    'initial_hidden']).reshape((-1, 28, 28))
            })
        plotter = LiveStream(train_first_layer.get_debug_values)

    for i, (n_samples, visible_data, label_data) in enumerate(
            dataset.training_set.minibatch_iterator(
                minibatch_size=minibatch_size,
                epochs=n_training_epochs_1,
                single_channel=True)):
        train_first_layer(visible_data)
        if i % check_period == 0:
            print 'Free Energy of Test Data: %s' % (free_energy_of_first_layer(
                dataset.test_set.input).mean())
            if plot:
                plotter.update()

    # Step 2: Train the second layer and simultanously compute the classification error from forward passes.
    if plot:
        train_second_layer.set_debug_variables(
            lambda: {
                'w_vis_hid':
                dbn._bridges['vis', 'hid']._w.T.reshape((-1, 28, 28)),
                'w_hid_ass':
                dbn._bridges['hid', 'ass']._w,
                'w_lab_ass':
                dbn._bridges['hid', 'ass']._w,
                'associative_state':
                train_second_layer.locals()['sleep_hidden'][0].reshape(
                    (-1, 20, 25)),
                'hidden_state':
                train_second_layer.locals()['sleep_visible'][0].reshape(
                    (-1, 20, 25)),
                'smooth_vis_state':
                dbn.get_inference_function('hid', 'vis', smooth=True).
                symbolic_stateless(train_second_layer.locals()['sleep_visible']
                                   [0]).reshape((-1, 28, 28))
            })
        plotter = LiveStream(train_first_layer.get_debug_values)

    for i, (n_samples, visible_data, label_data) in enumerate(
            dataset.training_set.minibatch_iterator(
                minibatch_size=minibatch_size,
                epochs=n_training_epochs_2,
                single_channel=True)):
        train_second_layer(visible_data, encode_label(label_data))
        if i % check_period == 0:
            out, = predict_label(dataset.test_set.input)
            score = percent_argmax_correct(actual=out,
                                           target=dataset.test_set.target)
            print 'Classification Score: %s' % score
            if plot:
                plotter.update()
예제 #9
0
    def get_constrastive_divergence_function(self, visible_layers, hidden_layers, input_layers = None, up_path = None, n_gibbs = 1, persistent = False,
            optimizer = SimpleGradientDescent(eta = 0.1)):
        """
        Make a symbolic function that does one step of contrastive divergence given a minibatch of input data.
        :param visible_layers: The visible layers of the RBM to be trained
        :param hidden_layers: The hidden layers of the RBM to be trained
        :param input_layers: The input layers (if not the same as the visible), whose activations will have to be passed
            up to the visible layers before training.
        :param up_path: The path from the input_layers to the hidden_layers (in the future this should be found
            automatically - now it is only computed automatically if there's a direct connection from input to visible)
        :param n_gibbs: Number of Gibbs block sampling steps to do
        :param persistent: True for pCD, false for regular
        :param optimizer: An IGradientOptimizer object.
        :return: A symbolic function of upate form:
            [(param_0, new_param_0), ...(persistent_state_0, new_persistent_state_0), ...] = func(in_0, in_1, ..._)
            That updates parameters in the specified RBM, and persistent state if persistent=True.
        """

        visible_layers = visible_layers if isinstance(visible_layers, (list, tuple)) else (visible_layers, )
        hidden_layers = hidden_layers if isinstance(hidden_layers, (list, tuple)) else (hidden_layers, )
        if input_layers is None:
            assert set(visible_layers).issubset(self._graph.get_input_variables()), "If you don't specify input layers, "\
                "the visible layers must be inputs to the graph.  But they are not.  Visible layers: %s, Input layers: %s" \
                % (visible_layers, self._graph.get_input_variables().keys())

        elif up_path is None:
            up_path = self.get_inference_function(input_layers = input_layers, output_layers = visible_layers)
        else:
            up_path = self._graph.get_execution_path(up_path)

        propup = self.get_inference_function(visible_layers, hidden_layers)
        free_energy = self.get_free_energy_function(visible_layers, hidden_layers)

        @symbolic_updater
        def cd_function(*input_signals):

            if input_layers is None:
                wake_visible = input_signals
            else:
                wake_visible, _ = up_path(*input_signals)
            wake_hidden, _ = propup(*wake_visible)

            initial_hidden =[theano.shared(np.zeros(wh.tag.test_value.shape, dtype = theano.config.floatX), name = 'persistent_hidden_state') for wh in wake_hidden] \
                if persistent else wake_hidden

            gibbs_path = [(hidden_layers, visible_layers)] + [(visible_layers, hidden_layers), (hidden_layers, visible_layers)] * (n_gibbs-1)
            sleep_visible, _ = self.get_inference_function(hidden_layers, visible_layers, gibbs_path)(*initial_hidden)
            sleep_hidden, _ = propup(*sleep_visible)

            free_energy_difference = free_energy(*wake_visible).mean() - free_energy(*sleep_visible).mean()

            all_params = sum([x.parameters for x in ([self._layers[i] for i in visible_layers]
                +[self._layers[i] for i in hidden_layers]+[self._bridges[i, j] for i in visible_layers for j in hidden_layers])], [])

            updates = optimizer(cost = free_energy_difference, parameters = all_params, constants = wake_visible+sleep_visible)

            if persistent:
                updates += [(p, s) for p, s in zip(initial_hidden, sleep_hidden)]

            return updates

        return cd_function
예제 #10
0
def demo_rbm_mnist(plot=True, test_mode=False):
    """
    In this demo we train an RBM on the MNIST input data (labels are ignored).  We plot the state of a markov chanin
    that is being simulaniously sampled from the RBM, and the parameters of the RBM.

    What you see:
    A plot will appear with 6 subplots.  The subplots are as follows:
    hidden-neg-chain: The activity of the hidden layer for each of the persistent CD chains for draewing negative samples.
    visible-neg-chain: The probabilities of the visible activations corresponding to the state of hidden-neg-chain.
    w: A subset of the weight vectors, reshaped to the shape of the input.
    b: The bias of the hidden units.
    b_rev: The bias of the visible units.
    visible-sample: The probabilities of the visible samples drawin from an independent free-sampling chain (outside the
        training function).

    As learning progresses, visible-neg-chain and visible-sample should increasingly resemble the data.
    """
    set_enable_omniscence(True)
    minibatch_size = 9
    n_epochs = 0.01 if test_mode else 10

    dataset = get_mnist_dataset().process_with(
        inputs_processor=lambda (x, ): (x.reshape(x.shape[0], -1), ))

    rbm = simple_rbm(
        visible_layer=StochasticLayer('bernoulli'),
        bridge=FullyConnectedBridge(
            w=0.001 *
            np.random.randn(28 * 28, 500).astype(theano.config.floatX),
            b=0,
            b_rev=0),
        hidden_layer=StochasticLayer('bernoulli'))

    train_function = rbm.get_training_fcn(
        n_gibbs=4, persistent=True,
        optimizer=SimpleGradientDescent(eta=0.01)).compile()
    sampling_function = rbm.get_free_sampling_fcn(
        init_visible_state=np.random.randn(9, 28 * 28),
        return_smooth_visible=True).compile()

    if plot:

        def debug_variable_setter():
            lv = train_function.symbolic.locals()
            return {
                'hidden-neg-chain':
                lv.sleep_hidden.reshape((-1, 25, 20)),
                'visible-neg-chain':
                lv.hidden_layer.smooth(lv.bridge.reverse(
                    lv.sleep_hidden)).reshape((-1, 28, 28)),
                'w':
                lv.bridge.parameters[0].T[:25].reshape((-1, 28, 28)),
                'b':
                lv.bridge.parameters[1].reshape((25, 20)),
                'b_rev':
                lv.bridge.parameters[2].reshape((28, 28)),
            }

        train_function.set_debug_variables(debug_variable_setter)

    stream = LiveStream(lambda: dict(train_function.get_debug_values().items(
    ) + [('visible-sample', visible_samples.reshape((-1, 28, 28)))]),
                        update_every=10)
    for _, visible_data, _ in dataset.training_set.minibatch_iterator(
            minibatch_size=minibatch_size, epochs=n_epochs,
            single_channel=True):
        visible_samples, _ = sampling_function()
        train_function(visible_data)
        if plot:
            stream.update()