Esempio n. 1
0
def train_conventional_mlp_on_mnist(hidden_sizes,
                                    n_epochs=50,
                                    w_init='xavier-both',
                                    minibatch_size=20,
                                    rng=1234,
                                    optimizer='sgd',
                                    hidden_activations='relu',
                                    output_activation='softmax',
                                    learning_rate=0.01,
                                    cost_function='nll',
                                    use_bias=True,
                                    l1_loss=0,
                                    l2_loss=0,
                                    test_on='training+test'):

    dataset = get_mnist_dataset(flat=True)\

    if output_activation != 'softmax':
        dataset = dataset.to_onehot()

    all_layer_sizes = [dataset.input_size
                       ] + hidden_sizes + [dataset.n_categories]
    weights = initialize_network_params(layer_sizes=all_layer_sizes,
                                        mag=w_init,
                                        base_dist='normal',
                                        include_biases=False,
                                        rng=rng)
    net = MultiLayerPerceptron(weights=weights,
                               hidden_activation=hidden_activations,
                               output_activation=output_activation,
                               use_bias=use_bias)
    predictor = GradientBasedPredictor(
        function=net,
        cost_function=get_named_cost_function(cost_function),
        optimizer=get_named_optimizer(optimizer, learning_rate=learning_rate),
        regularization_cost=lambda params: sum(l1_loss * abs(p_).sum(
        ) + l2_loss * (p_**2).sum() if p_.ndim == 2 else 0
                                               for p_ in params)).compile()
    assess_online_predictor(predictor=predictor,
                            dataset=dataset,
                            evaluation_function='percent_argmax_correct',
                            test_epochs=range(0, n_epochs, 1),
                            test_on=test_on,
                            minibatch_size=minibatch_size)
    ws = [p.get_value() for p in net.parameters]
    return ws
Esempio n. 2
0
def demo_mnist_online_regression(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        regressor_type = 'multinomial',
        n_epochs = 20,
        n_test_points = 30,
        max_training_samples = None,
        include_biases = True,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30, flat = True)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples, flat = True)

    assert regressor_type in ('multinomial', 'logistic', 'linear')

    n_outputs = dataset.n_categories
    if regressor_type in ('logistic', 'linear'):
        dataset = dataset.to_onehot()

    predictor = OnlineRegressor(
        input_size = dataset.input_size,
        output_size = n_outputs,
        regressor_type = regressor_type,
        optimizer=get_named_optimizer(name = optimizer, learning_rate=learning_rate),
        include_biases = include_biases
        ).compile()

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size
    )

    plot_learning_curves(results)
Esempio n. 3
0
def demo_mnist_online_regression(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        regressor_type = 'multinomial',
        n_epochs = 20,
        n_test_points = 30,
        max_training_samples = None,
        include_biases = True,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30, flat = True)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples, flat = True)

    assert regressor_type in ('multinomial', 'logistic', 'linear')

    n_outputs = dataset.n_categories
    if regressor_type in ('logistic', 'linear'):
        dataset = dataset.to_onehot()

    predictor = OnlineRegressor(
        input_size = dataset.input_size,
        output_size = n_outputs,
        regressor_type = regressor_type,
        optimizer=get_named_optimizer(name = optimizer, learning_rate=learning_rate),
        include_biases = include_biases
        ).compile()

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size
    )

    plot_learning_curves(results)
Esempio n. 4
0
 def __init__(self,
              ws,
              bs=None,
              comp_weight=1e-6,
              optimizer=None,
              layerwise_scales=False,
              parametrization='log',
              hidden_activations='relu',
              output_activation='softmax',
              rng=None):
     """
     Learns how to rescale the units to be an optimal rounding network.
     :param ws: A list of (n_in, n_out) weight matrices
     :param bs: A length of bias vectors (same length as ws)
     :param comp_weight: The weight (lambda in the paper) given to computation
     :param optimizer: The optimizer (an IGradientOptimizer object)
     :param layerwise_scales: Make scales layerwise (as opposed to unitwise)
     :param parametrization: What space to parametrize in ('log', 'direct', or 'softplus')
     :param hidden_activations: Hidden activation functions (as a string, eg 'relu')
     :param output_activation: Output activation function
     :param rng: Random number generator or seed.
     """
     if optimizer is None:
         optimizer = get_named_optimizer('sgd', 0.01)
     if bs is None:
         bs = [np.zeros(w.shape[1]) for w in ws]
     self.ws = [create_shared_variable(w) for w in ws]
     self.bs = [create_shared_variable(b) for b in bs]
     self.comp_weight = tt.constant(comp_weight, dtype=theano.config.floatX)
     self.optimizer = optimizer
     self.hidden_activations = hidden_activations
     self.output_activation = output_activation
     scale_dims = [()] * len(ws) if layerwise_scales else [
         ws[0].shape[0]
     ] + [w.shape[1] for w in ws[:-1]]
     self.k_params = \
         [create_shared_variable(np.ones(d)) for d in scale_dims] if parametrization=='direct' else \
         [create_shared_variable(np.zeros(d)) for d in scale_dims] if parametrization=='log' else \
         [create_shared_variable(np.zeros(d)+np.exp(1)-1) for d in scale_dims] if parametrization=='softplus' else \
         bad_value(parametrization)
     self.parametrization = parametrization
     self.rng = get_theano_rng(rng)
def get_predictor(predictor_type, input_size, target_size, hidden_sizes = [240], output_activation = 'sigm',
        hidden_activation = 'tanh', optimizer = 'adamax', learning_rate = 0.01, noise = 1, w_init=0.01, rng = None):
    """
    Specify parameters that will allow you to construct a predictor

    :param predictor_type: String identifying the predictor class (see below)
    :param input_size: Integer size of the input vector.  Integer
    :param target_size: Integer size of the target vector
    :param hidden_sizes:
    :param input_activation:
    :param hidden_activation:
    :param optimizer:
    :param learning_rate:
    :return:
    """
    return {
        'MLP': lambda: GradientBasedPredictor(
            function = MultiLayerPerceptron.from_init(
                layer_sizes = [input_size] + hidden_sizes + [target_size],
                hidden_activation=hidden_activation,
                output_activation=output_activation,
                w_init = w_init,
                rng = rng
                ),
            cost_function = mean_squared_error,
            optimizer = get_named_optimizer(optimizer, learning_rate),
            ).compile(),
        'DTP': lambda: DifferenceTargetMLP.from_initializer(
            input_size = input_size,
            output_size = target_size,
            hidden_sizes = hidden_sizes,
            optimizer_constructor = lambda: get_named_optimizer(optimizer, learning_rate),
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init_mag=w_init,
            noise = noise,
            rng = rng,
            ).compile(),
        'PreAct-DTP': lambda: DifferenceTargetMLP.from_initializer(
            input_size = input_size,
            output_size = target_size,
            hidden_sizes = hidden_sizes,
            optimizer_constructor = lambda: get_named_optimizer(optimizer, learning_rate),
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init_mag=w_init,
            noise = noise,
            layer_constructor = PreActivationDifferenceTargetLayer.from_initializer,
            rng = rng,
            ).compile(),
        'Linear-DTP': lambda: LinearDifferenceTargetMLP.from_initializer(
            input_size = input_size,
            output_size = target_size,
            hidden_sizes = hidden_sizes,
            optimizer_constructor = lambda: get_named_optimizer(optimizer, learning_rate),
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation='linear',
            w_init_mag=w_init,
            noise = noise,
            rng = rng,
            # layer_constructor = LinearDifferenceTargetLayer.from_initializer
            ).compile(),
        }[predictor_type]()
Esempio n. 6
0
def demo_optimize_conv_scales(n_epochs=5,
                              comp_weight=1e-11,
                              learning_rate=0.1,
                              error_loss='KL',
                              use_softmax=True,
                              optimizer='sgd',
                              shuffle_training=False):
    """
    Run the scale optimization routine on a convnet.  
    :param n_epochs:
    :param comp_weight:
    :param learning_rate:
    :param error_loss:
    :param use_softmax:
    :param optimizer:
    :param shuffle_training:
    :return:
    """
    if error_loss == 'KL' and not use_softmax:
        raise Exception(
            "It's very strange that you want to use a KL divergence on something other than a softmax error.  I assume you've made a mistake."
        )

    training_videos, training_vgg_inputs = get_vgg_video_splice(
        ['ILSVRC2015_train_00033010', 'ILSVRC2015_train_00336001'],
        shuffle=shuffle_training,
        shuffling_rng=1234)
    test_videos, test_vgg_inputs = get_vgg_video_splice(
        ['ILSVRC2015_train_00033009', 'ILSVRC2015_train_00033007'])

    set_dbplot_figure_size(12, 6)

    n_frames_to_show = 10
    display_frames = np.arange(
        len(test_videos) / n_frames_to_show / 2, len(test_videos),
        len(test_videos) / n_frames_to_show)
    ax1 = dbplot(np.concatenate(test_videos[display_frames], axis=1),
                 "Test Videos",
                 title='',
                 plot_type='pic')
    plt.subplots_adjust(wspace=0, hspace=.05)
    ax1.set_xticks(224 * np.arange(len(display_frames) / 2) * 2 + 224 / 2)
    ax1.tick_params(labelbottom='on')

    layers = get_vgg_layer_specifiers(
        up_to_layer='prob' if use_softmax else 'fc8')

    # Setup the true VGGnet and get the outputs
    f_true = ConvNet.from_init(layers, input_shape=(3, 224, 224)).compile()
    true_test_out = flatten2(
        np.concatenate([
            f_true(frame_positions[None])
            for frame_positions in test_vgg_inputs
        ]))
    top5_true_guesses = argtopk(true_test_out, 5)
    true_guesses = np.argmax(true_test_out, axis=1)
    true_labels = [
        get_vgg_label_at(g, short=True)
        for g in true_guesses[display_frames[::2]]
    ]
    full_convnet_cost = np.array([
        get_full_convnet_computational_cost(layer_specs=layers,
                                            input_shape=(3, 224, 224))
    ] * len(test_videos))

    # Setup the approximate networks
    slrc_net = ScaleLearningRoundingConvnet.from_convnet_specs(
        layers,
        optimizer=get_named_optimizer(optimizer, learning_rate=learning_rate),
        corruption_type='rand',
        rng=1234)
    f_train_slrc = slrc_net.train_scales.partial(
        comp_weight=comp_weight, error_loss=error_loss).compile()
    f_get_scales = slrc_net.get_scales.compile()
    round_fp = RoundConvNetForwardPass(layers)
    sigmadelta_fp = SigmaDeltaConvNetForwardPass(layers,
                                                 input_shape=(3, 224, 224))

    p = ProgressIndicator(n_epochs * len(training_videos))

    output_dir = make_dir(get_local_path('output/%T-convnet-spikes'))

    for input_minibatch, minibatch_info in minibatch_iterate_info(
            training_vgg_inputs,
            n_epochs=n_epochs,
            minibatch_size=1,
            test_epochs=np.arange(0, n_epochs, 0.1)):

        if minibatch_info.test_now:
            with EZProfiler('test'):
                current_scales = f_get_scales()
                round_cost, round_out = round_fp.get_cost_and_output(
                    test_vgg_inputs, scales=current_scales)
                sd_cost, sd_out = sigmadelta_fp.get_cost_and_output(
                    test_vgg_inputs, scales=current_scales)
                round_guesses, round_top1_correct, round_top5_correct = get_and_report_scores(
                    round_cost,
                    round_out,
                    name='Round',
                    true_top_1=true_guesses,
                    true_top_k=top5_true_guesses)
                sd_guesses, sd_top1_correct, sd_top5_correct = get_and_report_scores(
                    sd_cost,
                    sd_out,
                    name='SigmaDelta',
                    true_top_1=true_guesses,
                    true_top_k=top5_true_guesses)

                round_labels = [
                    get_vgg_label_at(g, short=True)
                    for g in round_guesses[display_frames[::2]]
                ]

                ax1.set_xticklabels([
                    '{}\n{}'.format(tg, rg)
                    for tg, rg in izip_equal(true_labels, round_labels)
                ])

                ax = dbplot(
                    np.array([
                        round_cost / 1e9, sd_cost / 1e9,
                        full_convnet_cost / 1e9
                    ]).T,
                    'Computation',
                    plot_type='thick-line',
                    ylabel='GOps',
                    title='',
                    legend=['Round', '$\Sigma\Delta$', 'Original'],
                )
                ax.set_xticklabels([])
                plt.grid()
                dbplot(
                    100 * np.array(
                        [cummean(sd_top1_correct),
                         cummean(sd_top5_correct)]).T,
                    "Score",
                    plot_type=lambda: LinePlot(
                        y_bounds=(0, 100),
                        plot_kwargs=[
                            dict(linewidth=3, color='k'),
                            dict(linewidth=3, color='k', linestyle=':')
                        ]),
                    title='',
                    legend=[
                        'Round/$\Sigma\Delta$ Top-1',
                        'Round/$\Sigma\Delta$ Top-5'
                    ],
                    ylabel='Cumulative\nPercent Accuracy',
                    xlabel='Frame #',
                    layout='v',
                )
                plt.grid()
            plt.savefig(
                os.path.join(output_dir,
                             'epoch-%.3g.pdf' % (minibatch_info.epoch, )))
        f_train_slrc(input_minibatch)
        p()
        print "Epoch {:3.2f}: Scales: {}".format(
            minibatch_info.epoch, ['%.3g' % float(s) for s in f_get_scales()])

    results = dict(current_scales=current_scales,
                   round_cost=round_cost,
                   round_out=round_out,
                   sd_cost=sd_cost,
                   sd_out=sd_out,
                   round_guesses=round_guesses,
                   round_top1_correct=round_top1_correct,
                   round_top5_correct=round_top5_correct,
                   sd_guesses=sd_guesses,
                   sd_top1_correct=sd_top1_correct,
                   sd_top5_correct=sd_top5_correct)

    dbplot_hang()
    return results
Esempio n. 7
0
def demo_mnist_mlp(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        hidden_sizes = [300],
        w_init = 0.01,
        hidden_activation = 'tanh',
        output_activation = 'softmax',
        cost = 'nll-d',
        visualize_params = False,
        n_test_points = 30,
        n_epochs = 10,
        max_training_samples = None,
        use_bias = True,
        onehot = False,
        rng = 1234,
        plot = False,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples)

    if onehot:
        dataset = dataset.to_onehot()

    if minibatch_size == 'full':
        minibatch_size = dataset.training_set.n_samples

    optimizer = get_named_optimizer(name = optimizer, learning_rate=learning_rate)

    # Setup the training and test functions
    predictor = GradientBasedPredictor(
        function = MultiLayerPerceptron.from_init(
            layer_sizes=[dataset.input_size]+hidden_sizes+[10],
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init = w_init,
            use_bias=use_bias,
            rng = rng,
            ),
        cost_function=cost,
        optimizer=optimizer
        ).compile()  # .compile() turns the GradientBasedPredictor, which works with symbolic variables, into a real one that takes and returns arrays.

    def vis_callback(xx):
        p = predictor.symbolic_predictor._function
        in_layer = {
            'Layer[0].w': p.layers[0].linear_transform._w.get_value().T.reshape(-1, 28, 28),
            'Layer[0].b': p.layers[0].linear_transform._b.get_value(),
            }
        other_layers = [{'Layer[%s].w' % (i+1): l.linear_transform._w.get_value(), 'Layer[%s].b' % (i+1): l.linear_transform._b.get_value()} for i, l in enumerate(p.layers[1:])]
        dbplot(dict(in_layer.items() + sum([o.items() for o in other_layers], [])))

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size,
        test_callback=vis_callback if visualize_params else None
    )

    if plot:
        plot_learning_curves(results)
Esempio n. 8
0
def get_predictor(predictor_type,
                  input_size,
                  target_size,
                  hidden_sizes=[240],
                  output_activation='sigm',
                  hidden_activation='tanh',
                  optimizer='adamax',
                  learning_rate=0.01,
                  noise=1,
                  w_init=0.01,
                  use_bias=True,
                  rng=None):
    """
    Specify parameters that will allow you to construct a predictor

    :param predictor_type: String identifying the predictor class (see below)
    :param input_size: Integer size of the input vector.  Integer
    :param target_size: Integer size of the target vector
    :param hidden_sizes:
    :param input_activation:
    :param hidden_activation:
    :param optimizer:
    :param learning_rate:
    :return:
    """
    return {
        'MLP':
        lambda: GradientBasedPredictor(
            function=MultiLayerPerceptron.from_init(
                layer_sizes=[input_size] + hidden_sizes + [target_size],
                hidden_activation=hidden_activation,
                output_activation=output_activation,
                use_bias=use_bias,
                w_init=w_init,
                rng=rng),
            cost_function=mean_squared_error,
            optimizer=get_named_optimizer(optimizer, learning_rate),
        ).compile(),
        'DTP':
        lambda: DifferenceTargetMLP.from_initializer(
            input_size=input_size,
            output_size=target_size,
            hidden_sizes=hidden_sizes,
            optimizer_constructor=lambda: get_named_optimizer(
                optimizer, learning_rate),
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init_mag=w_init,
            noise=noise,
            rng=rng,
            use_bias=use_bias,
        ).compile(),
        'PreAct-DTP':
        lambda: DifferenceTargetMLP.from_initializer(
            input_size=input_size,
            output_size=target_size,
            hidden_sizes=hidden_sizes,
            optimizer_constructor=lambda: get_named_optimizer(
                optimizer, learning_rate),
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init_mag=w_init,
            noise=noise,
            layer_constructor=PreActivationDifferenceTargetLayer.
            from_initializer,
            rng=rng,
            use_bias=use_bias,
        ).compile(),
        'Linear-DTP':
        lambda: LinearDifferenceTargetMLP.from_initializer(
            input_size=input_size,
            output_size=target_size,
            hidden_sizes=hidden_sizes,
            optimizer_constructor=lambda: get_named_optimizer(
                optimizer, learning_rate),
            # input_activation=input_activation,
            hidden_activation=hidden_activation,
            output_activation='linear',
            w_init_mag=w_init,
            noise=noise,
            rng=rng,
            use_bias=use_bias,
            # layer_constructor = LinearDifferenceTargetLayer.from_initializer
        ).compile(),
    }[predictor_type]()
Esempio n. 9
0
def demo_mnist_mlp(
        minibatch_size = 10,
        learning_rate = 0.1,
        optimizer = 'sgd',
        hidden_sizes = [300],
        w_init = 0.01,
        hidden_activation = 'tanh',
        output_activation = 'softmax',
        cost = 'nll-d',
        visualize_params = False,
        n_test_points = 30,
        n_epochs = 10,
        max_training_samples = None,
        use_bias = True,
        onehot = False,
        rng = 1234,
        plot = False,
        ):
    """
    Train an MLP on MNIST and print the test scores as training progresses.
    """

    if is_test_mode():
        n_test_points = 3
        minibatch_size = 5
        n_epochs = 0.01
        dataset = get_mnist_dataset(n_training_samples=30, n_test_samples=30)
    else:
        dataset = get_mnist_dataset(n_training_samples=max_training_samples)

    if onehot:
        dataset = dataset.to_onehot()

    if minibatch_size == 'full':
        minibatch_size = dataset.training_set.n_samples

    optimizer = get_named_optimizer(name = optimizer, learning_rate=learning_rate)

    # Setup the training and test functions
    predictor = GradientBasedPredictor(
        function = MultiLayerPerceptron.from_init(
            layer_sizes=[dataset.input_size]+hidden_sizes+[10],
            hidden_activation=hidden_activation,
            output_activation=output_activation,
            w_init = w_init,
            use_bias=use_bias,
            rng = rng,
            ),
        cost_function=cost,
        optimizer=optimizer
        ).compile()  # .compile() turns the GradientBasedPredictor, which works with symbolic variables, into a real one that takes and returns arrays.

    def vis_callback(xx):
        p = predictor.symbolic_predictor._function
        in_layer = {
            'Layer[0].w': p.layers[0].linear_transform._w.get_value().T.reshape(-1, 28, 28),
            'Layer[0].b': p.layers[0].linear_transform._b.get_value(),
            }
        other_layers = [{'Layer[%s].w' % (i+1): l.linear_transform._w.get_value(), 'Layer[%s].b' % (i+1): l.linear_transform._b.get_value()} for i, l in enumerate(p.layers[1:])]
        dbplot(dict(in_layer.items() + sum([o.items() for o in other_layers], [])))

    # Train and periodically report the test score.
    results = assess_online_predictor(
        dataset=dataset,
        predictor=predictor,
        evaluation_function='percent_argmax_correct',
        test_epochs=sqrtspace(0, n_epochs, n_test_points),
        minibatch_size=minibatch_size,
        test_callback=vis_callback if visualize_params else None
    )

    if plot:
        plot_learning_curves(results)