Ejemplo n.º 1
0
def demo_compare_dtp_optimizers(
    hidden_sizes=[240],
    n_epochs=10,
    minibatch_size=20,
    n_tests=20,
    hidden_activation='tanh',
):

    dataset = get_mnist_dataset(flat=True).to_onehot()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 2

    def make_dtp_net(optimizer_constructor, output_fcn):
        return DifferenceTargetMLP.from_initializer(
            input_size=dataset.input_size,
            output_size=dataset.target_size,
            hidden_sizes=hidden_sizes,
            optimizer_constructor=optimizer_constructor,
            input_activation='sigm',
            hidden_activation=hidden_activation,
            output_activation=output_fcn,
            w_init_mag=0.01,
            noise=1,
        ).compile()

    learning_curves = compare_predictors(
        dataset=dataset,
        online_predictors={
            'SGD-0.001-softmax':
            make_dtp_net(lambda: SimpleGradientDescent(0.001),
                         output_fcn='softmax'),
            'AdaMax-0.001-softmax':
            make_dtp_net(lambda: AdaMax(0.001), output_fcn='softmax'),
            'RMSProp-0.001-softmax':
            make_dtp_net(lambda: RMSProp(0.001), output_fcn='softmax'),
            'SGD-0.001-sigm':
            make_dtp_net(lambda: SimpleGradientDescent(0.001),
                         output_fcn='sigm'),
            'AdaMax-0.001-sigm':
            make_dtp_net(lambda: AdaMax(0.001), output_fcn='sigm'),
            'RMSProp-0.001-sigm':
            make_dtp_net(lambda: RMSProp(0.001), output_fcn='sigm'),
        },
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct,
    )

    plot_learning_curves(learning_curves)
Ejemplo n.º 2
0
    def get_training_fcn(n_gibbs=1,
                         persistent=False,
                         optimizer=SimpleGradientDescent(eta=0.01)):
        @symbolic_updater
        def train(wake_visible):

            wake_hidden = propup(wake_visible)

            persistent_state = sleep_hidden = theano.shared(
                np.zeros(wake_hidden.tag.test_value.shape,
                         dtype=theano.config.floatX),
                name='persistend_hidden_state') if persistent else wake_hidden

            for _ in xrange(n_gibbs):
                sleep_visible = propdown(sleep_hidden)
                sleep_hidden = propup(sleep_visible)

            wake_energy = bridge.free_energy(
                wake_visible) + hidden_layer.free_energy(bridge(wake_visible))
            sleep_energy = bridge.free_energy(
                sleep_visible) + hidden_layer.free_energy(
                    bridge(sleep_visible))
            cost = tt.mean(wake_energy - sleep_energy)

            params = visible_layer.parameters + bridge.parameters + hidden_layer.parameters
            optimizer(cost=cost,
                      parameters=params,
                      constants=[wake_visible, sleep_visible])

            if persistent:
                add_update(persistent_state, sleep_hidden)

        return train
Ejemplo n.º 3
0
def mnist_adamax_showdown(hidden_size=300, n_epochs=10, n_tests=20):

    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda optimizer: GradientBasedPredictor(
        function=MultiLayerPerceptron.from_init(layer_sizes=[
            dataset.input_size, hidden_size, dataset.n_categories
        ],
                                                hidden_activation='sig',
                                                output_activation='lin',
                                                w_init=0.01,
                                                rng=5),
        cost_function=softmax_negative_log_likelihood,
        optimizer=optimizer,
    ).compile()

    return compare_predictors(dataset=dataset,
                              online_predictors={
                                  'sgd':
                                  make_mlp(SimpleGradientDescent(eta=0.1)),
                                  'adamax': make_mlp(AdaMax(alpha=1e-3)),
                              },
                              minibatch_size=20,
                              test_epochs=sqrtspace(0, n_epochs, n_tests),
                              evaluation_function=percent_argmax_correct)
Ejemplo n.º 4
0
def test_symbolic_predicors():
    """
    This test is meant to serves as both a test and tutorial for how to use a symbolic predictor.
    It shows how to construct a symbolic predictor using a function, cost function, and optimizer.
    It then trains this predictor on a synthetic toy dataset and demonstrates that it has learned.
    """

    dataset = get_synthetic_clusters_dataset()

    symbolic_predictor = GradientBasedPredictor(
        function = MultiLayerPerceptron.from_init(
            layer_sizes = [dataset.input_size, 100, dataset.n_categories],
            output_activation='softmax',
            w_init = 0.1,
            rng = 3252
            ),
        cost_function=negative_log_likelihood_dangerous,
        optimizer=SimpleGradientDescent(eta = 0.1),
        )

    predictor = symbolic_predictor.compile()
    # .compile() turns the symbolic predictor into an IPredictor object, which can be called with numpy arrays.

    init_score = percent_argmax_correct(predictor.predict(dataset.test_set.input), dataset.test_set.target)
    for x_m, y_m in zip_minibatch_iterate([dataset.training_set.input, dataset.training_set.target], minibatch_size=10, n_epochs=20):
        predictor.train(x_m, y_m)
    final_score = percent_argmax_correct(predictor.predict(dataset.test_set.input), dataset.test_set.target)

    print 'Initial score: %s%%.  Final score: %s%%' % (init_score, final_score)
    assert init_score < 30
    assert final_score > 98
Ejemplo n.º 5
0
    def __init__(self,
                 w,
                 b,
                 w_rev,
                 b_rev,
                 backward_activation='tanh',
                 forward_activation='tanh',
                 rng=None,
                 noise=1,
                 optimizer_constructor=lambda: SimpleGradientDescent(0.01),
                 cost_function=mean_squared_error,
                 use_bias=True):

        self.noise = noise
        self.rng = get_theano_rng(rng)
        self.w = theano.shared(w, name='w')
        self.b = theano.shared(b, name='b')
        self.w_rev = theano.shared(w_rev, name='w_rev')
        self.b_rev = theano.shared(b_rev, name='b_rev')
        self.backward_activation = get_named_activation_function(
            backward_activation) if backward_activation is not None else None
        self.forward_activation = get_named_activation_function(
            forward_activation)
        self.forward_optimizer = optimizer_constructor()
        self.backward_optimizer = optimizer_constructor()
        self.cost_function = cost_function
        self.use_bias = use_bias
Ejemplo n.º 6
0
def demo_rbm_mnist(
        vis_activation = 'bernoulli',
        hid_activation = 'bernoulli',
        n_hidden = 500,
        plot = True,
        eta = 0.01,
        optimizer = 'sgd',
        w_init_mag = 0.001,
        minibatch_size = 9,
        persistent = False,
        n_epochs = 100,
        plot_interval = 100,
        ):
    """
    In this demo we train an RBM on the MNIST input data (labels are ignored).  We plot the state of a markov chanin
    that is being simulaniously sampled from the RBM, and the parameters of the RBM.

    What you see:
    A plot will appear with 6 subplots.  The subplots are as follows:
    hidden-neg-chain: The activity of the hidden layer for each of the persistent CD chains for draewing negative samples.
    visible-neg-chain: The probabilities of the visible activations corresponding to the state of hidden-neg-chain.
    w: A subset of the weight vectors, reshaped to the shape of the input.
    b: The bias of the hidden units.
    b_rev: The bias of the visible units.
    visible-sample: The probabilities of the visible samples drawin from an independent free-sampling chain (outside the
        training function).

    As learning progresses, visible-neg-chain and visible-sample should increasingly resemble the data.
    """
    with EnableOmniscence():
        # EnableOmniscence allows us to plot internal variables (by referencing the .locals() attribute of a symbolic function.. see plot_fcn below)

        if is_test_mode():
            n_epochs = 0.01

        data = get_mnist_dataset(flat = True).training_set.input

        rbm = simple_rbm(
            visible_layer = StochasticNonlinearity(vis_activation),
            bridge=FullyConnectedBridge(w = w_init_mag*np.random.randn(28*28, n_hidden).astype(theano.config.floatX), b=0, b_rev = 0),
            hidden_layer = StochasticNonlinearity(hid_activation)
            )

        optimizer = \
            SimpleGradientDescent(eta = eta) if optimizer == 'sgd' else \
            AdaMax(alpha=eta) if optimizer == 'adamax' else \
            bad_value(optimizer)

        train_function = rbm.get_training_fcn(n_gibbs = 1, persistent = persistent, optimizer = optimizer).compile()

        def plot_fcn():
            lv = train_function.locals()
            dbplot(lv['wake_visible'].reshape((-1, 28, 28)), 'visible-pos-chain')
            dbplot(lv['sleep_visible'].reshape((-1, 28, 28)), 'visible-neg-chain')

        for i, visible_data in enumerate(minibatch_iterate(data, minibatch_size=minibatch_size, n_epochs=n_epochs)):
            train_function(visible_data)
            if plot and i % plot_interval == 0:
                plot_fcn()
Ejemplo n.º 7
0
def compare_example_predictors(
    n_epochs=5,
    n_tests=20,
    minibatch_size=10,
):
    """
    This demo shows how we can compare different online predictors.  The demo trains both predictors on the dataset,
    returning an object that contains the results.

    :param test_mode: Set this to True to just run the demo quicky (but not to completion) to see that it doesn't break.
    """

    dataset = get_mnist_dataset(flat=True)
    # "Flatten" the 28x28 inputs to a 784-d vector

    if is_test_mode():
        # Shorten the dataset so we run through it quickly in test mode.
        dataset = dataset.shorten(200)
        n_epochs = 1
        n_tests = 3

    # Here we compare three predictors on MNIST - an MLP, a Perceptron, and a Random Forest.
    # - The MLP is defined using Plato's interfaces - we create a Symbolic Predictor (GradientBasedPredictor) and
    #   then compile it into an IPredictor object
    # - The Perceptron directly implements the IPredictor interface.
    # - The Random Forest implements SciKit learn's predictor interface - that is, it has a fit(x, y) and a predict(x) method.
    learning_curve_data = compare_predictors(
        dataset=dataset,
        online_predictors={
            'Perceptron':
            Perceptron(w=np.zeros((dataset.input_size, dataset.n_categories)),
                       alpha=0.001).
            to_categorical(
                n_categories=dataset.n_categories
            ),  # .to_categorical allows the perceptron to be trained on integer labels.
            'MLP':
            GradientBasedPredictor(
                function=MultiLayerPerceptron.from_init(
                    layer_sizes=[
                        dataset.input_size, 500, dataset.n_categories
                    ],
                    hidden_activation='sig',  # Sigmoidal hidden units
                    output_activation=
                    'softmax',  # Softmax output unit, since we're doing multinomial classification
                    w_init=0.01,
                    rng=5),
                cost_function=
                negative_log_likelihood_dangerous,  # "Dangerous" because it doesn't check to see that output is normalized, but we know it is because it comes from softmax.
                optimizer=SimpleGradientDescent(eta=0.1),
            ).compile(),  # .compile() returns an IPredictor
        },
        offline_predictors={'RF': RandomForestClassifier(n_estimators=40)},
        minibatch_size=minibatch_size,
        test_epochs=sqrtspace(0, n_epochs, n_tests),
        evaluation_function=percent_argmax_correct  # Compares one-hot
    )
    # Results is a LearningCurveData object
    return learning_curve_data
Ejemplo n.º 8
0
def mlp_normalization(hidden_size=300,
                      n_epochs=30,
                      n_tests=50,
                      minibatch_size=20):
    """
    Compare mlp with different schemes for normalizing input.

    regular: Regular vanilla MLP
    normalize: Mean-subtract/normalize over minibatch
    normalize and scale: Mean-subtract/normalize over minibatch AND multiply by a trainable
        (per-unit) scale parameter.

    Conclusions: No significant benefit to scale parameter.  Normalizing gives
    a head start but incurs a small cost later on.  But really all classifiers are quite similar.

    :param hidden_size: Size of hidden layer
    """
    dataset = get_mnist_dataset()

    if is_test_mode():
        dataset = dataset.shorten(200)
        n_epochs = 0.1
        n_tests = 3

    make_mlp = lambda normalize, scale: GradientBasedPredictor(
        function=MultiLayerPerceptron.from_init(layer_sizes=[
            dataset.input_size, hidden_size, dataset.n_categories
        ],
                                                hidden_activation='sig',
                                                output_activation='lin',
                                                normalize_minibatch=normalize,
                                                scale_param=scale,
                                                w_init=0.01,
                                                rng=5),
        cost_function=softmax_negative_log_likelihood,
        optimizer=SimpleGradientDescent(eta=0.1),
    ).compile()

    return compare_predictors(dataset=dataset,
                              online_predictors={
                                  'regular':
                                  make_mlp(normalize=False, scale=False),
                                  'normalize':
                                  make_mlp(normalize=True, scale=False),
                                  'normalize and scale':
                                  make_mlp(normalize=True, scale=True),
                              },
                              minibatch_size=minibatch_size,
                              test_epochs=sqrtspace(0, n_epochs, n_tests),
                              evaluation_function=percent_argmax_correct)
Ejemplo n.º 9
0
def test_difference_target_mlp():

    assert_online_predictor_not_broken(
        predictor_constructor=lambda n_in, n_out: DifferenceTargetMLP.
        from_initializer(input_size=n_in,
                         output_size=n_out,
                         hidden_sizes=[50],
                         optimizer_constructor=lambda: SimpleGradientDescent(
                             0.01),
                         w_init_mag=0.01,
                         rng=1234).compile(),
        minibatch_size=10,
        n_epochs=10,
    )
Ejemplo n.º 10
0
def test_mlp():

    assert_online_predictor_not_broken(
        predictor_constructor=lambda n_dim_in, n_dim_out:
        GradientBasedPredictor(
            function=MultiLayerPerceptron.from_init(
                layer_sizes=[n_dim_in, 100, n_dim_out],
                output_activation='softmax',
                w_init=0.1,
                rng=3252),
            cost_function=negative_log_likelihood_dangerous,
            optimizer=SimpleGradientDescent(eta=0.1),
        ).compile(),
        categorical_target=True,
        minibatch_size=10,
        n_epochs=2)
Ejemplo n.º 11
0
def test_maxout_mlp():

    assert_online_predictor_not_broken(
        predictor_constructor=lambda n_dim_in, n_dim_out:
        GradientBasedPredictor(
            function=create_maxout_network(
                layer_sizes=[n_dim_in, 100, n_dim_out],
                maxout_widths=4,
                output_activation='softmax',
                w_init=0.01,
                rng=1234,
            ),
            cost_function=negative_log_likelihood_dangerous,
            optimizer=SimpleGradientDescent(eta=0.1),
        ).compile(),
        categorical_target=True,
        minibatch_size=10,
        n_epochs=2)
Ejemplo n.º 12
0
    def __init__(self,
                 input_size,
                 output_size,
                 regressor_type='multinomial',
                 optimizer=SimpleGradientDescent(eta=0.01),
                 include_biases=True):

        self.w = theano.shared(
            np.zeros((input_size, output_size), dtype=theano.config.floatX))
        self.b = theano.shared(
            np.zeros(output_size, dtype=theano.config.floatX))
        self.optimizer = optimizer
        self.activation, self.cost_fcn = {
            'multinomial':
            (tt.nnet.softmax, negative_log_likelihood_dangerous),
            'logistic': (tt.nnet.sigmoid, mean_xe),
            'linear': (lambda x: x, mean_squared_error)
        }[regressor_type]
        self.include_biases = include_biases
Ejemplo n.º 13
0
    def get_training_fcn(self, n_gibbs=1, persistent = False, optimizer = SimpleGradientDescent(eta = 0.01)):

        @symbolic_updater
        def train(wake_visible):

            wake_hidden = self.propup(wake_visible)
            persistent_state = sleep_hidden = create_shared_variable(np.zeros(wake_hidden.tag.test_value.shape),
                name = 'persistend_hidden_state') if persistent else wake_hidden
            for _ in xrange(n_gibbs):
                sleep_visible = self.propdown(sleep_hidden)
                sleep_hidden = self.propup(sleep_visible)
            wake_energy = self.energy(wake_visible)
            sleep_energy = self.energy(sleep_visible)
            cost = wake_energy - sleep_energy
            optimizer(cost = cost, parameters = self.parameters, constants = [wake_visible, sleep_visible])
            if persistent:
                add_update(persistent_state, sleep_hidden)

        return train
Ejemplo n.º 14
0
def test_bare_bones_mlp(seed=1234):
    """
    This verifies that the MLP works.  It's intentionally not using any wrappers on top of MLP to show its "bare bones"
    usage.  Wrapping in GradientBasedPredictor can simplify usage - see test_symbolic_predictors.
    """

    dataset = get_synthetic_clusters_dataset()

    mlp = MultiLayerPerceptron.from_init(
        layer_sizes=[dataset.input_size, 20, dataset.n_categories],
        hidden_activation='relu',
        output_activation='softmax',
        w_init=0.01,
        rng=seed)

    fwd_fcn = mlp.compile()

    optimizer = SimpleGradientDescent(eta=0.1)

    @symbolic_updater
    def train(x, y):
        output = mlp(x)
        cost = negative_log_likelihood_dangerous(output, y)
        optimizer(cost, mlp.parameters)

    train_fcn = train.compile()

    init_score = percent_argmax_correct(fwd_fcn(dataset.test_set.input),
                                        dataset.test_set.target)

    for x_m, y_m in zip_minibatch_iterate(
        [dataset.training_set.input, dataset.training_set.target],
            minibatch_size=10,
            n_epochs=20):
        train_fcn(x_m, y_m)

    final_score = percent_argmax_correct(fwd_fcn(dataset.test_set.input),
                                         dataset.test_set.target)
    print 'Initial score: %s%%.  Final score: %s%%' % (init_score, final_score)
    assert init_score < 30
    assert final_score > 98
Ejemplo n.º 15
0
def demo_dbn_mnist(plot=True):
    """
    In this demo we train an RBM on the MNIST input data (labels are ignored).  We plot the state of a markov chanin
    that is being simulaniously sampled from the RBM, and the parameters of the RBM.
    """

    minibatch_size = 20
    dataset = get_mnist_dataset().process_with(
        inputs_processor=lambda (x, ): (x.reshape(x.shape[0], -1), ))
    w_init = lambda n_in, n_out: 0.01 * np.random.randn(n_in, n_out)
    n_training_epochs_1 = 20
    n_training_epochs_2 = 20
    check_period = 300

    with EnableOmniscence(
    ):  # This constrction allows us to access internal variables for plotting purposes.  When you
        # call fcn.locals() on a symbolic function you can get the values of their variables.

        if is_test_mode():
            n_training_epochs_1 = 0.01
            n_training_epochs_2 = 0.01
            check_period = 100

        dbn = DeepBeliefNet(layers={
            'vis': StochasticNonlinearity('bernoulli'),
            'hid': StochasticNonlinearity('bernoulli'),
            'ass': StochasticNonlinearity('bernoulli'),
            'lab': StochasticNonlinearity('bernoulli'),
        },
                            bridges={
                                ('vis', 'hid'):
                                FullyConnectedBridge(w=w_init(784, 500),
                                                     b_rev=0),
                                ('hid', 'ass'):
                                FullyConnectedBridge(w=w_init(500, 500),
                                                     b_rev=0),
                                ('lab', 'ass'):
                                FullyConnectedBridge(w=w_init(10, 500),
                                                     b_rev=0)
                            })

        # Compile the functions you're gonna use.
        train_first_layer = dbn.get_constrastive_divergence_function(
            visible_layers='vis',
            hidden_layers='hid',
            optimizer=SimpleGradientDescent(eta=0.01),
            n_gibbs=1,
            persistent=True).compile()
        free_energy_of_first_layer = dbn.get_free_energy_function(
            visible_layers='vis', hidden_layers='hid').compile()
        train_second_layer = dbn.get_constrastive_divergence_function(
            visible_layers=('hid', 'lab'),
            hidden_layers='ass',
            input_layers=('vis', 'lab'),
            n_gibbs=1,
            persistent=True).compile()
        predict_label = dbn.get_inference_function(input_layers='vis',
                                                   output_layers='lab',
                                                   path=[('vis', 'hid'),
                                                         ('hid', 'ass'),
                                                         ('ass', 'lab')],
                                                   smooth=True).compile()

        encode_label = OneHotEncoding(n_classes=10)

        # Step 1: Train the first layer, plotting the weights and persistent chain state.
        for i, (n_samples, visible_data, label_data) in enumerate(
                dataset.training_set.minibatch_iterator(
                    minibatch_size=minibatch_size,
                    epochs=n_training_epochs_1,
                    single_channel=True)):
            train_first_layer(visible_data)
            if i % check_period == 0:
                print 'Free Energy of Test Data: %s' % (
                    free_energy_of_first_layer(dataset.test_set.input).mean())
                if plot:
                    dbplot(
                        dbn.bridges['vis', 'hid'].w.get_value().T.reshape(
                            (-1, 28, 28)), 'weights')
                    dbplot(
                        train_first_layer.locals()['sleep_visible'][0].reshape(
                            (-1, 28, 28)), 'vis_sleep_state')

        # Step 2: Train the second layer and simultanously compute the classification error from forward passes.
        for i, (n_samples, visible_data, label_data) in enumerate(
                dataset.training_set.minibatch_iterator(
                    minibatch_size=minibatch_size,
                    epochs=n_training_epochs_2,
                    single_channel=True)):
            train_second_layer(visible_data, encode_label(label_data))
            if i % check_period == 0:
                out, = predict_label(dataset.test_set.input)
                score = percent_argmax_correct(actual=out,
                                               target=dataset.test_set.target)
                print 'Classification Score: %s' % score
                if plot:
                    dbplot(
                        dbn.bridges['vis', 'hid'].w.get_value().T.reshape(
                            (-1, 28, 28)), 'w_vis_hid')
                    dbplot(dbn.bridges['hid', 'ass'].w.get_value(),
                           'w_hid_ass')
                    dbplot(dbn.bridges['hid', 'ass'].w.get_value(),
                           'w_lab_ass')
                    dbplot(
                        train_second_layer.locals()['sleep_visible']
                        [0].reshape((-1, 20, 25)), 'hidden_state')
Ejemplo n.º 16
0
 def get_training_fcn(self, n_gibbs=1, persistent = False, optimizer = SimpleGradientDescent(eta = 0.01)):
     pass
Ejemplo n.º 17
0
def demo_simple_dbn(minibatch_size=10,
                    n_training_epochs_1=5,
                    n_training_epochs_2=50,
                    n_hidden_1=500,
                    n_hidden_2=10,
                    plot_period=100,
                    eta1=0.01,
                    eta2=0.0001,
                    w_init_mag_1=0.01,
                    w_init_mag_2=0.5,
                    seed=None):
    """
    Train a DBN, and create a function to project the test data into a latent space

    :param minibatch_size:
    :param n_training_epochs_1: Number of training epochs for the first-level RBM
    :param n_training_epochs_2: Number of training epochs for the second-level RBM
    :param n_hidden_1: Number of hidden units for first RBM
    :param n_hidden_2:nNumber of hidden units for second RBM
    :param plot_period: How often to plot
    :param seed:
    :return:
    """

    dataset = get_mnist_dataset(flat=True)
    rng = np.random.RandomState(seed)
    w_init_1 = lambda shape: w_init_mag_1 * rng.randn(*shape)
    w_init_2 = lambda shape: w_init_mag_2 * rng.randn(*shape)

    if is_test_mode():
        n_training_epochs_1 = 0.01
        n_training_epochs_2 = 0.01

    # Train the first RBM
    dbn1 = StackedDeepBeliefNet(rbms=[
        BernoulliBernoulliRBM.from_initializer(
            n_visible=784, n_hidden=n_hidden_1, w_init_fcn=w_init_1)
    ])
    train_first_layer = dbn1.get_training_fcn(
        optimizer=SimpleGradientDescent(eta=eta1), n_gibbs=1,
        persistent=True).compile()
    sample_first_layer = dbn1.get_sampling_fcn(
        initial_vis=dataset.training_set.input[:minibatch_size],
        n_steps=10).compile()
    for i, vis_data in enumerate(
            minibatch_iterate(dataset.training_set.input,
                              minibatch_size=minibatch_size,
                              n_epochs=n_training_epochs_1)):
        if i % plot_period == plot_period - 1:
            dbplot(dbn1.rbms[0].w.get_value().T[:100].reshape([-1, 28, 28]),
                   'weights1')
            dbplot(sample_first_layer()[0].reshape(-1, 28, 28), 'samples1')
        train_first_layer(vis_data)

    # Train the second RBM
    dbn2 = dbn1.stack_another(rbm=BernoulliGaussianRBM.from_initializer(
        n_visible=n_hidden_1, n_hidden=n_hidden_2, w_init_fcn=w_init_2))
    train_second_layer = dbn2.get_training_fcn(
        optimizer=SimpleGradientDescent(eta=eta2), n_gibbs=1,
        persistent=True).compile()
    sample_second_layer = dbn2.get_sampling_fcn(
        initial_vis=dataset.training_set.input[:minibatch_size],
        n_steps=10).compile()
    for i, vis_data in enumerate(
            minibatch_iterate(dataset.training_set.input,
                              minibatch_size=minibatch_size,
                              n_epochs=n_training_epochs_2)):
        if i % plot_period == 0:
            dbplot(dbn2.rbms[1].w.get_value(), 'weights2')
            dbplot(sample_second_layer()[0].reshape(-1, 28, 28), 'samples2')
        train_second_layer(vis_data)

    # Project data to latent space.
    project_to_latent = dbn2.propup.compile(fixed_args=dict(stochastic=False))
    latent_test_data = project_to_latent(dataset.test_set.input)
    print 'Projected the test data to a latent space.  Shape: %s' % (
        latent_test_data.shape, )

    decode = dbn2.propdown.compile(fixed_args=dict(stochastic=False))
    recon_test_data = decode(latent_test_data)
    print 'Reconstructed the test data.  Shape: %s' % (recon_test_data.shape, )
Ejemplo n.º 18
0
    def get_constrastive_divergence_function(self, visible_layers, hidden_layers, input_layers = None, up_path = None, n_gibbs = 1, persistent = False,
            method = 'free_energy', optimizer = SimpleGradientDescent(eta = 0.1)):
        """
        Make a symbolic function that does one step of contrastive divergence given a minibatch of input data.
        :param visible_layers: The visible layers of the RBM to be trained
        :param hidden_layers: The hidden layers of the RBM to be trained
        :param input_layers: The input layers (if not the same as the visible), whose activations will have to be passed
            up to the visible layers before training.
        :param up_path: The path from the input_layers to the hidden_layers (in the future this should be found
            automatically - now it is only computed automatically if there's a direct connection from input to visible)
        :param n_gibbs: Number of Gibbs block sampling steps to do
        :param persistent: True for pCD, false for regular
        :param optimizer: An IGradientOptimizer object.
        :return: A symbolic function of upate form:
            [(param_0, new_param_0), ...(persistent_state_0, new_persistent_state_0), ...] = func(in_0, in_1, ..._)
            That updates parameters in the specified RBM, and persistent state if persistent=True.
        """

        visible_layers = visible_layers if isinstance(visible_layers, (list, tuple)) else (visible_layers, )
        hidden_layers = hidden_layers if isinstance(hidden_layers, (list, tuple)) else (hidden_layers, )
        if input_layers is None:
            assert set(visible_layers).issubset(self._graph.get_input_variables()), "If you don't specify input layers, "\
                "the visible layers must be inputs to the graph.  But they are not.  Visible layers: %s, Input layers: %s" \
                % (visible_layers, self._graph.get_input_variables().keys())

        elif up_path is None:
            up_path = self.get_inference_function(input_layers = input_layers, output_layers = visible_layers)
        else:
            up_path = self._graph.get_execution_path(up_path)

        propup = self.get_inference_function(visible_layers, hidden_layers)
        free_energy = self.get_free_energy_function(visible_layers, hidden_layers)

        @symbolic_updater
        def cd_function(*input_signals):

            wake_visible = input_signals if input_layers is None else up_path(*input_signals)
            wake_hidden = propup(*wake_visible)

            initial_hidden =[theano.shared(np.zeros(wh.tag.test_value.shape, dtype = theano.config.floatX), name = 'persistent_hidden_state') for wh in wake_hidden] \
                if persistent else wake_hidden

            gibbs_path = [(hidden_layers, visible_layers)] + [(visible_layers, hidden_layers), (hidden_layers, visible_layers)] * (n_gibbs-1)
            sleep_visible = self.get_inference_function(hidden_layers, visible_layers, gibbs_path)(*initial_hidden)
            sleep_hidden = propup(*sleep_visible)

            all_params = sum([x.parameters for x in ([self.layers[i] for i in visible_layers]
                +[self.layers[i] for i in hidden_layers]+[self.bridges[i, j] for i in visible_layers for j in hidden_layers])], [])

            if method == 'free_energy':
                cost = free_energy(*wake_visible).mean() - free_energy(*sleep_visible).mean()
            elif method == 'energy':
                cost = tt.mean(wake_visible.T.dot(wake_hidden) - sleep_visible.T.dot(sleep_hidden))
            else:
                bad_value(method)

            optimizer(cost = cost, parameters = all_params, constants = wake_visible+sleep_visible)

            if persistent:
                for p, s in zip(initial_hidden, sleep_hidden):
                    add_update(p, s)

        return cd_function
Ejemplo n.º 19
0
                                           optimizer_constructor=lambda:
                                           GradientDescent(eta=0.0001),
                                           n_epochs=30,
                                           minibatch_size=1),
    description="RELU DTP with just 1 sample per minibatch.",
    conclusion=
    "kaBOOM.  Unless you really lower the learning rate down to 0.0001.  In which case it's ok. "
    "Reached 94.17% in 6.73 epochs, which is when I lost patience.")

register_experiment(
    name='all-norm-relu-dtp',
    function=lambda: demo_run_dtp_on_mnist(
        input_activation='safenorm-relu',
        hidden_activation='safenorm-relu',
        output_activation='safenorm-relu',
        optimizer_constructor=lambda: SimpleGradientDescent(eta=0.1),
        normalize_inputs=True,
    ),
    description="Now try with normalized-relu units",
    conclusion=
    "Works, kind of, gets to like 93.5%.  Most hidden units seem to die.  At least it doesn't explode."
)

register_experiment(
    name='all-softplus-dtp',
    function=lambda: demo_run_dtp_on_mnist(
        input_activation='softplus',
        hidden_activation='softplus',
        output_activation='softplus',
        optimizer_constructor=lambda: SimpleGradientDescent(eta=0.01),
    ),