training_stopper])

# epoch_logger.subscribe_to('validation misclassification',
#                                validation_misclassification_monitor)

# Gets called by trainer between training epochs.
validation_callback = ValidationCallback(inputs=[input_indices_symbolic.output_symbol],
                                         input_iterator=mnist_validation_iterator,
                                         epoch_callbacks=[misclassification_rate_monitor])

trainer = Bgfs(inputs=[input_indices_symbolic],
              parameters=params_flat,
              old_parameters=params_old_flat,
              gradient=gradient_symbol,
              learning_rate=.3,
              training_iterator=mnist_training_iterator,
              training_set=indices_training_dataset,
              scalar_loss=scalar_loss_symbol,
              epoch_callbacks=[validation_callback,  # measure validation misclassification rate, quit if it stops falling
                               LimitsNumEpochs(1000),
                               EpochTimer()])  # perform no more than 100 epochs

start_time = time.time()
trainer.train()
elapsed_time = time.time() - start_time

plt.plot(_classification_errors)
plt.show()

print("The time elapsed for training is ", elapsed_time)
validation_iter = validation_set.iterator(iterator_type="sequential", batch_size=100)

# Gets called by trainer between training epochs.
validation_callback = ValidationCallback(
    inputs=[image_node.output_symbol, label_node.output_symbol],
    input_iterator=validation_iter,
    monitors=[misclassification_rate_monitor],
)

trainer = Bgfs(
    inputs=[image_node, label_node],
    parameters=params_flat,
    old_parameters=params_old_flat,
    gradient=gradient_symbol,
    gradient_at_old_params=gradient_symbol_old_params,
    learning_rate=0.3,
    training_iterator=training_iter,
    monitors=[],
    epoch_callbacks=[
        validation_callback,  # measure validation misclassification rate, quit if it stops falling
        LimitsNumEpochs(100),
    ],
)  # perform no more than 100 epochs

start_time = time.time()
_classification_errors = trainer.train()
print _classification_errors
elapsed_time = time.time() - start_time

plt.plot(_classification_errors)
plt.show()
Ejemplo n.º 3
0
def main():
    '''
    Entry point of this script.
    '''

    args = parse_args()

    # Hyperparameter values taken from Pylearn2:
    # In pylearn2/scripts/tutorials/convolutional_network/:
    #   convolutional_network.ipynb

    filter_counts = [64, 64]
    filter_init_uniform_ranges = [.05] * len(filter_counts)
    filter_shapes = [(5, 5), (5, 5)]
    pool_shapes = [(4, 4), (4, 4)]
    pool_strides = [(2, 2), (2, 2)]
    affine_output_sizes = [10]
    affine_init_stddevs = [.05] * len(affine_output_sizes)
    dropout_include_rates = ([.5 if args.dropout else 1.0] *
                             (len(filter_counts) + len(affine_output_sizes)))

    assert_equal(affine_output_sizes[-1], 10)

    mnist_training, mnist_testing = load_mnist()

    if args.validation_size == 0:
        # use testing set as validation set
        mnist_validation = mnist_testing
    else:
        # split training set into training and validation sets
        tensors = mnist_training.tensors
        training_tensors = [t[:-args.validation_size, ...] for t in tensors]
        validation_tensors = [t[-args.validation_size:, ...] for t in tensors]

        if args.shuffle_dataset == True:
            def shuffle_in_unison_inplace(a, b):
                assert len(a) == len(b)
                p = numpy.random.permutation(len(a))
                return a[p], b[p]

            [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1])
            [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1])


        mnist_training = Dataset(tensors=training_tensors,
                                 names=mnist_training.names,
                                 formats=mnist_training.formats)
        mnist_validation = Dataset(tensors=validation_tensors,
                                   names=mnist_training.names,
                                   formats=mnist_training.formats)

    mnist_validation_iterator = mnist_validation.iterator(
        iterator_type='sequential',
        loop_style='divisible',
        batch_size=args.batch_size)

    mnist_training_iterator = mnist_training.iterator(
        iterator_type='sequential',
        loop_style='divisible',
        batch_size=args.batch_size)

    image_uint8_node, label_node = mnist_validation_iterator.make_input_nodes()
    image_node = RescaleImage(image_uint8_node)

    rng = numpy.random.RandomState(129734)
    theano_rng = RandomStreams(2387845)

    (conv_layers,
     affine_layers,
     output_node,
     params_flat,
     params_old_flat) = build_conv_classifier(image_node,
                                          filter_shapes,
                                          filter_counts,
                                          filter_init_uniform_ranges,
                                          pool_shapes,
                                          pool_strides,
                                          affine_output_sizes,
                                          affine_init_stddevs,
                                          dropout_include_rates,
                                          rng,
                                          theano_rng)

    loss_node = CrossEntropy(output_node, label_node)
    scalar_loss = loss_node.output_symbol.mean()
#    scalar_loss2 = theano.clone(scalar_loss, replace = {params_flat: params_old_flat})

    if args.weight_decay != 0.0:
        for conv_layer in conv_layers:
            filters = conv_layer.conv2d_node.filters
            filter_loss = args.weight_decay * theano.tensor.sqr(filters).sum()
            scalar_loss = scalar_loss + filter_loss

        for affine_layer in affine_layers:
            weights = affine_layer.affine_node.linear_node.params
            weight_loss = args.weight_decay * theano.tensor.sqr(weights).sum()
            scalar_loss = scalar_loss + weight_loss

    max_epochs = 500

    #
    # Makes parameter updater
    #

    gradient = theano.gradient.grad(scalar_loss, params_flat)

    loss_function = theano.function([image_uint8_node.output_symbol, label_node.output_symbol],scalar_loss)
    gradient_function = theano.function([image_uint8_node.output_symbol, label_node.output_symbol],gradient)

    cost_arguments = mnist_training_iterator.next()
    print(loss_function(*cost_arguments))
    grads = gradient_function(*cost_arguments)
    print(grads)
    print(grads.shape)

    #
    # Makes batch and epoch callbacks
    #
    def make_output_filename(args, best=False):
            '''
            Constructs a filename that reflects the command-line params.
            '''
            assert_equal(os.path.splitext(args.output_prefix)[1], "")

            if os.path.isdir(args.output_prefix):
                output_dir, output_prefix = args.output_prefix, ""
            else:
                output_dir, output_prefix = os.path.split(args.output_prefix)
                assert_true(os.path.isdir(output_dir))

            if output_prefix != "":
                output_prefix = output_prefix + "_"

            output_prefix = os.path.join(output_dir, output_prefix)

            return ("%slr-%g_mom-%g_nesterov-%s_bs-%d%s.pkl" %
                    (output_prefix,
                     args.learning_rate,
                     args.initial_momentum,
                     not args.no_nesterov,
                     args.batch_size,
                     "_best" if best else ""))


    # Set up the loggers
    epoch_logger = EpochLogger(make_output_filename(args) + "_log.h5")
    misclassification_node = Misclassification(output_node, label_node)

    validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[])
    epoch_logger.subscribe_to('validation mean loss', validation_loss_monitor)

    training_stopper = StopsOnStagnation(max_epochs=100,
                                             min_proportional_decrease=0.0)
    validation_misclassification_monitor = MeanOverEpoch(misclassification_node,
                                             callbacks=[print_misclassification_rate,
                                                        training_stopper])

    epoch_logger.subscribe_to('validation misclassification',
                                validation_misclassification_monitor)

    # batch callback (monitor)
    #training_loss_logger = LogsToLists()
    training_loss_monitor = MeanOverEpoch(loss_node,
                                          callbacks=[print_loss])
    epoch_logger.subscribe_to("training loss", training_loss_monitor)

    training_misclassification_monitor = MeanOverEpoch(misclassification_node,
                                                       callbacks=[])
    epoch_logger.subscribe_to('training misclassification %',
                              training_misclassification_monitor)

    epoch_timer = EpochTimer()
#    epoch_logger.subscribe_to('epoch time',
 #                             epoch_timer)
    #################


    model = SerializableModel([image_uint8_node], [output_node])
    saves_best = SavesAtMinimum(model, make_output_filename(args, best=True))

    validation_loss_monitor = MeanOverEpoch(loss_node,
                                            callbacks=[saves_best])
    epoch_logger.subscribe_to("Validation Loss", validation_loss_monitor)

    validation_callback = ValidationCallback(
        inputs=[image_uint8_node.output_symbol, label_node.output_symbol],
        input_iterator=mnist_validation_iterator,
        epoch_callbacks=[validation_loss_monitor,
                         validation_misclassification_monitor])

    # trainer = Sgd((image_node.output_symbol, label_node.output_symbol),
    trainer = Bgfs(inputs=[image_node, label_node],
                  parameters=params_flat,
                  old_parameters=params_old_flat,
                  gradient=gradient,
                  learning_rate=args.learning_rate,
                  training_iterator=mnist_training_iterator,
                  training_set = mnist_training,
                  scalar_loss=scalar_loss,
                  epoch_callbacks=([
                             #training_loss_monitor,
                             # training_misclassification_monitor,
                              validation_callback,
                              LimitsNumEpochs(max_epochs),
                              EpochTimer()]))

    '''
    stuff_to_pickle = OrderedDict(
        (('model', model),
         ('validation_loss_logger', validation_loss_logger)))

    # Pickling the trainer doesn't work when there are Dropout nodes.
    # stuff_to_pickle = OrderedDict(
    #     (('trainer', trainer),
    #      ('validation_loss_logger', validation_loss_logger),
    #      ('model', model)))

    trainer.epoch_callbacks += (momentum_updaters +
                                [EpochTimer(),
                                 PicklesOnEpoch(stuff_to_pickle,
                                                make_output_filename(args),
                                                overwrite=False),
                                 validation_callback,
                                 LimitsNumEpochs(max_epochs)])
    '''

    start_time = time.time()
    trainer.train()
    elapsed_time = time.time() - start_time

    print("Total elapsed time is for training is: ", elapsed_time)