예제 #1
0
    def get_optimized_images(float_image):

        optimized_images = input_float_node.output_format.make_batch(
            is_symbolic=False,
            batch_size=10)

        for i in xrange(model.output_nodes[0].output_format.shape[1]):
            print("optimizing image w.r.t. '%d' label" % i)
            param_updater = SgdParameterUpdater(
                shared_input_float,
                loss_symbol,
                learning_rate=args.learning_rate,
                momentum=args.momentum,
                use_nesterov=args.nesterov)

            sgd = Sgd(input_nodes=[],
                      input_iterator=DummyIterator(),
                      callbacks=[param_updater,
                                 LimitsNumEpochs(args.max_iterations)])

            shared_input_float.set_value(float_image)
            shared_label.set_value(numpy.asarray([i],
                                                 dtype=shared_label.dtype))
            sgd.train()

            optimized_images[i, ...] = shared_input_float.get_value()[0, ...]

        return optimized_images
예제 #2
0
def test_mean_over_epoch():

    rng = numpy.random.RandomState(3851)

    vectors = rng.uniform(-1.0, 1.0, size=(12, 10))
    fmt = DenseFormat(axes=('b', 'f'), shape=(-1, 10), dtype=vectors.dtype)
    dataset = Dataset(names=['vectors'], formats=[fmt], tensors=[vectors])
    iterator = dataset.iterator('sequential',
                                batch_size=2,
                                loop_style="divisible")

    input_node = iterator.make_input_nodes()[0]
    l2_norm_node = L2Norm(input_node)

    num_averages_compared = [0]

    def compare_with_expected_average(values, _):  # ignore format argument
        assert_equal(len(values), 1)
        average = values[0]

        assert_is_instance(fmt, DenseFormat)
        l2_norms = numpy.sqrt((vectors ** 2.0).sum(fmt.axes.index('f')))
        expected_average = l2_norms.sum() / l2_norms.size

        assert_allclose(average, expected_average)
        num_averages_compared[0] += 1

    average_monitor = MeanOverEpoch(l2_norm_node,
                                    [compare_with_expected_average])

    class DatasetRandomizer(EpochCallback):
        '''
        Fills the dataset with a fresh set of random values after each epoch.
        '''

        def on_start_training(self):
            pass

        def on_epoch(self):
            vectors[...] = rng.uniform(-1.0, 1.0, size=vectors.shape)

    trainer = Sgd([input_node],
                  iterator,
                  callbacks=[average_monitor,
                             LimitsNumEpochs(3),
                             DatasetRandomizer()])

    trainer.train()

    assert_equal(num_averages_compared[0], 3)
예제 #3
0
def test_limit_param_norms():
    '''
    A unit test for limit_param_norms().

    Optimizes a simple function f = ||W - x||, with a limit on W's norms.

    Initial value of W is 0. ||W - x|| is bigger than W's max norm. Therefore,
    we expect the final value of W to be k, scaled to max_norm.
    '''

    floatX = theano.config.floatX

    def make_single_example_dataset(norm, shape, rng):
        '''
        Returns a Dataset with a single datum with a given L2 norm.

        Parameters
        ----------
        norm: float
          The L2 norm that the flattened datum should have.

        shape: Sequence
          The shape of the datum.

        Returns
        -------
        rval: Dataset
        '''
        axes = ('b', ) + tuple(str(i) for i in range(len(shape)))
        fmt = DenseFormat(axes=axes,
                          shape=(-1, ) + shape,
                          dtype=floatX)
        data = fmt.make_batch(batch_size=1, is_symbolic=False)
        data[...] = rng.uniform(low=-1.0, high=1.0, size=data.shape)

        sum_axes = tuple(range(1, len(shape) + 1))

        # Scale all data so that L2 norms = norm
        norms = numpy.sqrt((data ** 2.0).sum(axis=sum_axes, keepdims=True))
        scales = norm / (norms + .00001)
        data *= scales

        return Dataset(tensors=[data],
                       formats=[fmt],
                       names=['data'])

    def make_costs_node(input_node, weights):
        '''
        Returns a Node that computes the squared distance between input_node
        and weights.
        '''
        assert_is_instance(input_node, Node)
        flat_shape = (input_node.output_symbol.shape[0], -1)

        input_vectors = input_node.output_symbol.reshape(flat_shape)
        weight_vectors = weights.reshape((weights.shape[0], -1))

        diff = input_vectors - weight_vectors
        costs = T.sqr(diff).sum(axis=1)

        return Node([input_node],
                    costs,
                    DenseFormat(axes=['b'], shape=[-1], dtype=weights.dtype))

    dataset_norm = .3
    max_norm = .2
    learning_rate = .001
    rng = numpy.random.RandomState(325)

    def print_cost(monitored_value, fmt):
        print("avg cost: %s" % monitored_value)

    def print_weight_norm(monitored_values, fmt):
        assert_equal(len(monitored_values), 1)
        weights = monitored_values[0]
        norm = numpy.sqrt((weights.get_value() ** 2.0).sum())
        print("weights' norm: %s" % norm)

    for shape in ((2, ), (2, 3, 4)):
        dataset = make_single_example_dataset(dataset_norm, shape, rng)

        weights = theano.shared(numpy.zeros((1, ) + shape, dtype=floatX))

        training_iterator = dataset.iterator(iterator_type='sequential',
                                             batch_size=1)
        input_nodes = training_iterator.make_input_nodes()
        assert_equal(len(input_nodes), 1)

        costs_node = make_costs_node(input_nodes[0], weights)
        gradients = theano.gradient.grad(costs_node.output_symbol.mean(),
                                         weights)
        param_updater = SgdParameterUpdater(parameter=weights,
                                            gradient=gradients,
                                            learning_rate=learning_rate,
                                            momentum=0.0,
                                            use_nesterov=False)

        input_axes = tuple(range(1, len(shape) + 1))
        limit_param_norms(param_updater, weights, max_norm, input_axes)

        stops_on_stagnation = StopsOnStagnation(max_epochs=10)
        average_cost_monitor = MeanOverEpoch(costs_node,
                                             callbacks=[stops_on_stagnation])

        sgd = Sgd(inputs=input_nodes,
                  input_iterator=training_iterator,
                  callbacks=[param_updater, average_cost_monitor])
        sgd.train()

        weight_norm = numpy.sqrt((weights.get_value() ** 2.0).sum())
        assert_almost_equal(weight_norm, max_norm, decimal=6)

        # an optional sanity-check to confirm that the weights are on a
        # straight line between their initial value (0.0) and the data.
        normed_weights = weights.get_value() / weight_norm
        normed_data = dataset.tensors[0] / dataset_norm
        assert_allclose(normed_weights,
                        normed_data,
                        rtol=learning_rate * 10)
# Measures the average misclassification rate over some dataset
misclassification_rate_monitor = AverageMonitor(misclassification_node.output_symbol,
                                                misclassification_node.output_format,
                                                callbacks=[print_misclassification_rate,
                                                           saves_best,
                                                           training_stopper])

validation_iter = validation_set.iterator(iterator_type='sequential', batch_size=100)

# Gets called by trainer between training epochs.
validation_callback = ValidationCallback(inputs=[image_node.output_symbol, label_node.output_symbol],
                                         input_iterator=validation_iter,
                                         monitors=[misclassification_rate_monitor])

trainer = Sgd([image_node, label_node],
              training_iter,
              param_symbols,
              param_updaters,
              monitors=[],
              epoch_callbacks=[validation_callback,  # measure validation misclassification rate, quit if it stops falling
                               LimitsNumEpochs(100)])  # perform no more than 100 epochs

start_time = time.time()

trainer.train()
elapsed_time = time.time() - start_time

print "The time elapsed for training is ", elapsed_time

예제 #5
0
def main():
    args = parse_args()

    # Hyperparameter values taken from Pylearn2:
    # In pylearn2/scripts/tutorials/multilayer_perceptron/:
    #   multilayer_perceptron.ipynb
    #   mlp_tutorial_part_3.yaml

    sizes = [500, 500, 10]
    sparse_init_counts = [15, 15]
    assert_equal(len(sparse_init_counts), len(sizes) - 1)

    assert_equal(sizes[-1], 10)

    mnist_training, mnist_testing = load_mnist()

    # split training set into training and validation sets
    tensors = mnist_training.tensors
    training_tensors = [t[: -args.validation_size, ...] for t in tensors]
    validation_tensors = [t[-args.validation_size :, ...] for t in tensors]

    if args.no_shuffle_dataset == False:

        def shuffle_in_unison_inplace(a, b):
            assert len(a) == len(b)
            p = numpy.random.permutation(len(a))
            return a[p], b[p]

        [training_tensors[0], training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0], training_tensors[1])
        [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(
            validation_tensors[0], validation_tensors[1]
        )

    all_images_shared = theano.shared(numpy.vstack([training_tensors[0], validation_tensors[0]]))
    all_labels_shared = theano.shared(numpy.concatenate([training_tensors[1], validation_tensors[1]]))

    length_training = training_tensors[0].shape[0]
    length_validation = validation_tensors[0].shape[0]
    indices_training = numpy.asarray(range(length_training))
    indices_validation = numpy.asarray(range(length_training, length_training + length_validation))
    indices_training_dataset = Dataset(
        tensors=[indices_training], names=["indices"], formats=[DenseFormat(axes=["b"], shape=[-1], dtype="int64")]
    )
    indices_validation_dataset = Dataset(
        tensors=[indices_validation], names=["indices"], formats=[DenseFormat(axes=["b"], shape=[-1], dtype="int64")]
    )
    indices_training_iterator = indices_training_dataset.iterator(
        iterator_type="sequential", batch_size=args.batch_size
    )
    indices_validation_iterator = indices_validation_dataset.iterator(iterator_type="sequential", batch_size=10000)

    mnist_validation_iterator = indices_validation_iterator
    mnist_training_iterator = indices_training_iterator

    input_indices_symbolic, = indices_training_iterator.make_input_nodes()
    image_lookup_node = ImageLookeupNode(input_indices_symbolic, all_images_shared)
    label_lookup_node = LabelLookeupNode(input_indices_symbolic, all_labels_shared)

    image_node = CastNode(image_lookup_node, "floatX")
    # image_node = RescaleImage(image_uint8_node)

    rng = numpy.random.RandomState(34523)
    theano_rng = RandomStreams(23845)

    (affine_nodes, output_node) = build_fc_classifier(
        image_node, sizes, sparse_init_counts, args.dropout_include_rates, rng, theano_rng
    )

    loss_node = CrossEntropy(output_node, label_lookup_node)
    loss_sum = loss_node.output_symbol.mean()
    max_epochs = 200

    #
    # Makes parameter updaters
    #

    parameters = []
    parameter_updaters = []
    momentum_updaters = []
    for affine_node in affine_nodes:
        for params in (affine_node.linear_node.params, affine_node.bias_node.params):
            parameters.append(params)
            gradients = theano.gradient.grad(loss_sum, params)
            parameter_updater = SgdParameterUpdater(
                params, gradients, args.learning_rate, args.initial_momentum, args.nesterov
            )
            parameter_updaters.append(parameter_updater)

            momentum_updaters.append(
                LinearlyInterpolatesOverEpochs(
                    parameter_updater.momentum, args.final_momentum, args.epochs_to_momentum_saturation
                )
            )

    #
    # Makes batch and epoch callbacks
    #

    """
    def make_output_basename(args):
        assert_equal(os.path.splitext(args.output_prefix)[1], "")
        if os.path.isdir(args.output_prefix) and \
           not args.output_prefix.endswith('/'):
            args.output_prefix += '/'

        output_dir, output_prefix = os.path.split(args.output_prefix)
        if output_prefix != "":
            output_prefix = output_prefix + "_"

        output_prefix = os.path.join(output_dir, output_prefix)

        return "{}lr-{}_mom-{}_nesterov-{}_bs-{}".format(
            output_prefix,
            args.learning_rate,
            args.initial_momentum,
            args.nesterov,
            args.batch_size)
    """

    assert_equal(os.path.splitext(args.output_prefix)[1], "")
    if os.path.isdir(args.output_prefix) and not args.output_prefix.endswith("/"):
        args.output_prefix += "/"

    output_dir, output_prefix = os.path.split(args.output_prefix)
    if output_prefix != "":
        output_prefix = output_prefix + "_"

    output_prefix = os.path.join(output_dir, output_prefix)

    epoch_logger = EpochLogger(output_prefix + "SGD_nesterov.h5")

    # misclassification_node = Misclassification(output_node, label_node)
    # mcr_logger = LogsToLists()
    # training_stopper = StopsOnStagnation(max_epochs=10,
    #                                      min_proportional_decrease=0.0)

    misclassification_node = Misclassification(output_node, label_lookup_node)

    validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[])
    epoch_logger.subscribe_to("validation mean loss", validation_loss_monitor)

    validation_misclassification_monitor = MeanOverEpoch(
        misclassification_node, callbacks=[print_mcr, StopsOnStagnation(max_epochs=20, min_proportional_decrease=0.0)]
    )

    epoch_logger.subscribe_to("validation misclassification", validation_misclassification_monitor)

    # batch callback (monitor)
    # training_loss_logger = LogsToLists()
    training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss])
    epoch_logger.subscribe_to("training mean loss", training_loss_monitor)

    training_misclassification_monitor = MeanOverEpoch(misclassification_node, callbacks=[])
    epoch_logger.subscribe_to("training misclassification %", training_misclassification_monitor)

    # epoch callbacks
    # validation_loss_logger = LogsToLists()

    def make_output_filename(args, best=False):
        basename = make_output_basename(args)
        return "{}{}.pkl".format(basename, "_best" if best else "")

    # model = SerializableModel([input_indices_symbolic], [output_node])
    # saves_best = SavesAtMinimum(model, make_output_filename(args, best=True))

    validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[])

    epoch_logger.subscribe_to("validation loss", validation_loss_monitor)

    epoch_timer = EpochTimer2()
    epoch_logger.subscribe_to("epoch duration", epoch_timer)

    validation_callback = ValidationCallback(
        inputs=[input_indices_symbolic.output_symbol],
        input_iterator=mnist_validation_iterator,
        epoch_callbacks=[validation_loss_monitor, validation_misclassification_monitor],
    )

    trainer = Sgd(
        [input_indices_symbolic],
        mnist_training_iterator,
        callbacks=(
            parameter_updaters
            + momentum_updaters
            + [  # training_loss_monitor,
                # training_misclassification_monitor,
                validation_callback,
                LimitsNumEpochs(max_epochs),
                epoch_timer,
            ]
        ),
    )
    # validation_loss_monitor]))

    # stuff_to_pickle = OrderedDict(
    #     (('model', model),
    #      ('validation_loss_logger', validation_loss_logger)))

    # Pickling the trainer doesn't work when there are Dropout nodes.
    # stuff_to_pickle = OrderedDict(
    #     (('trainer', trainer),
    #      ('validation_loss_logger', validation_loss_logger),
    #      ('model', model)))

    # trainer.epoch_callbacks += (momentum_updaters +
    #                             [PicklesOnEpoch(stuff_to_pickle,
    #                                             make_output_filename(args),
    #                                             overwrite=False),
    #                              validation_callback,
    #                              LimitsNumEpochs(max_epochs)])

    start_time = time.time()
    trainer.train()
    elapsed_time = time.time() - start_time
    print("Total elapsed time is for training is: ", elapsed_time)
예제 #6
0
def main():
    '''
    Entry point of this script.
    '''

    args = parse_args()

    # Hyperparameter values taken from Pylearn2:
    # In pylearn2/scripts/tutorials/convolutional_network/:
    #   convolutional_network.ipynb

    filter_counts = [96, 192, 192]
    filter_init_uniform_ranges = [0.005]* len(filter_counts)
    filter_shapes = [(8, 8), (8,8), (5, 5)]
    pool_shapes = [(4, 4),(4, 4), (2, 2)]
    pool_strides = [(2, 2), (2, 2), (2,2)]
    pool_pads = [(2,2), (2,2), (2,2)]
    affine_output_sizes = [10]
    affine_init_stddevs = [.05] * len(affine_output_sizes)
    dropout_include_rates = [0.8, 0.5, 0.5, 0.5]
    #dropout_include_rates = ([.8 if args.dropout else 1.0] *
    #                         (len(filter_counts) + len(affine_output_sizes)))
    conv_pads = [(4, 4), (3, 3), (3, 3)]

    assert_equal(affine_output_sizes[-1], 10)

    def unpickle(file):
        import cPickle
        fo = open(file, 'rb')
        dict = cPickle.load(fo)
        fo.close()
        return dict

    batch1 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_1')
    batch2 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_2')
    batch3 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_3')
    batch4 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_4')
    batch5 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_5')

    training_tensors = [ numpy.concatenate((batch1['data'].reshape(10000,3,32,32), batch2['data'].reshape(10000,3,32,32), batch3['data'].reshape(10000,3,32,32), batch4['data'].reshape(10000,3,32,32) )), numpy.concatenate((batch1['labels'], batch2['labels'], batch3['labels'], batch4['labels'])) ]
    validation_tensors = [ batch5['data'].reshape(10000,3,32,32), numpy.asarray(batch5['labels']) ]

    if args.no_shuffle_dataset == False:
        def shuffle_in_unison_inplace(a, b):
            assert len(a) == len(b)
            p = numpy.random.permutation(len(a))
            return a[p], b[p]

        [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1])
        [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1])

    all_images_shared = theano.shared(numpy.vstack([training_tensors[0],validation_tensors[0]]))
    all_labels_shared = theano.shared(numpy.concatenate([training_tensors[1],validation_tensors[1]]))

    length_training = training_tensors[0].shape[0]
    length_validation = validation_tensors[0].shape[0]
    indices_training = numpy.asarray(range(length_training))
    indices_validation = numpy.asarray(range(length_training, length_training + length_validation))
    indices_training_dataset = Dataset( tensors=[indices_training], names=['indices'], formats=[DenseFormat(axes=['b'],shape=[-1],dtype='int64')] )
    indices_validation_dataset = Dataset( tensors=[indices_validation], names=['indices'], formats=[DenseFormat(axes=['b'],shape=[-1],dtype='int64')] )
    indices_training_iterator = indices_training_dataset.iterator(iterator_type='sequential',batch_size=args.batch_size)
    indices_validation_iterator = indices_validation_dataset.iterator(iterator_type='sequential',batch_size=args.batch_size)

    mnist_validation_iterator = indices_validation_iterator
    mnist_training_iterator = indices_training_iterator

    input_indices_symbolic, = indices_training_iterator.make_input_nodes()
    image_lookup_node = ImageLookeupNode(input_indices_symbolic, all_images_shared)
    label_lookup_node = LabelLookeupNode(input_indices_symbolic, all_labels_shared)

    image_node = RescaleImage(image_lookup_node)
    image_node = Lcn(image_node)

    rng = numpy.random.RandomState(129734)
    theano_rng = RandomStreams(2387845)

    (conv_layers,
     affine_layers,
     output_node) = build_conv_classifier(image_node,
                                          filter_shapes,
                                          filter_counts,
                                          filter_init_uniform_ranges,
                                          pool_shapes,
                                          pool_strides,
                                          affine_output_sizes,
                                          affine_init_stddevs,
                                          dropout_include_rates,
                                          conv_pads,
                                          rng,
                                          theano_rng)

    loss_node = CrossEntropy(output_node, label_lookup_node)
    scalar_loss = loss_node.output_symbol.mean()

    if args.weight_decay != 0.0:
        for conv_layer in conv_layers:
            filters = conv_layer.conv2d_node.filters
            filter_loss = args.weight_decay * theano.tensor.sqr(filters).sum()
            scalar_loss = scalar_loss + filter_loss

        for affine_layer in affine_layers:
            weights = affine_layer.affine_node.linear_node.params
            weight_loss = args.weight_decay * theano.tensor.sqr(weights).sum()
            scalar_loss = scalar_loss + weight_loss

    max_epochs = 201

    #
    # Makes parameter updaters
    #

    parameters = []
    parameter_updaters = []
    momentum_updaters = []

    def add_updaters(parameter,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters):
        '''
        Adds a ParameterUpdater to parameter_updaters, and a
        LinearlyInterpolatesOverEpochs to momentum_updaters.
        '''
        gradient = theano.gradient.grad(scalar_loss, parameter)
        parameter_updaters.append(SgdParameterUpdater(parameter,
                                                      gradient,
                                                      args.learning_rate,
                                                      args.initial_momentum,
                                                      args.nesterov))
        momentum_updaters.append(LinearlyInterpolatesOverEpochs(
            parameter_updaters[-1].momentum,
            args.final_momentum,
            args.epochs_to_momentum_saturation))

    for conv_layer in conv_layers:
        filters = conv_layer.conv2d_node.filters
        parameters.append(filters)
        add_updaters(filters,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)

        if args.max_filter_norm != numpy.inf:
            limit_param_norms(parameter_updaters[-1],
                              filters,
                              args.max_filter_norm,
                              (1, 2, 3))

        bias = conv_layer.bias_node.params
        parameters.append(bias)
        add_updaters(bias,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)

    for affine_layer in affine_layers:
        weights = affine_layer.affine_node.linear_node.params
        parameters.append(weights)
        add_updaters(weights,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)
        if args.max_col_norm != numpy.inf:
            limit_param_norms(parameter_updater=parameter_updaters[-1],
                              param=weights,
                              max_norm=args.max_col_norm,
                              input_axes=[0])

        biases = affine_layer.affine_node.bias_node.params
        parameters.append(biases)
        add_updaters(biases,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)

    #
    # Makes batch and epoch callbacks
    #
    def make_output_filename(args, best=False):
            '''
            Constructs a filename that reflects the command-line params.
            '''
            assert_equal(os.path.splitext(args.output_prefix)[1], "")

            if os.path.isdir(args.output_prefix):
                output_dir, output_prefix = args.output_prefix, ""
            else:
                output_dir, output_prefix = os.path.split(args.output_prefix)
                assert_true(os.path.isdir(output_dir))

            if output_prefix != "":
                output_prefix = output_prefix + "_"

            output_prefix = os.path.join(output_dir, output_prefix)

            return ("%slr-%g_mom-%g_nesterov-%s_bs-%d%s.pkl" %
                    (output_prefix,
                     args.learning_rate,
                     args.initial_momentum,
                     args.nesterov,
                     args.batch_size,
                     "_best" if best else ""))


    # Set up the loggers
    epoch_logger = EpochLogger(make_output_filename(args) + "_log.h5")
    misclassification_node = Misclassification(output_node, label_lookup_node)

    validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[])
    epoch_logger.subscribe_to('validation mean loss', validation_loss_monitor)

    training_stopper = StopsOnStagnation(max_epochs=201,
                                             min_proportional_decrease=0.0)
    validation_misclassification_monitor = MeanOverEpoch(misclassification_node,
                                             callbacks=[print_misclassification_rate,
                                                        training_stopper])

    epoch_logger.subscribe_to('validation misclassification',
                                validation_misclassification_monitor)

    # batch callback (monitor)
    #training_loss_logger = LogsToLists()
    training_loss_monitor = MeanOverEpoch(loss_node,
                                          callbacks=[print_loss])
    epoch_logger.subscribe_to("training loss", training_loss_monitor)

    training_misclassification_monitor = MeanOverEpoch(misclassification_node,
                                                       callbacks=[])
    epoch_logger.subscribe_to('training misclassification %',
                              training_misclassification_monitor)

    epoch_timer = EpochTimer2()
    epoch_logger.subscribe_to('epoch duration', epoch_timer)
#    epoch_logger.subscribe_to('epoch time',
 #                             epoch_timer)
    #################


    model = SerializableModel([input_indices_symbolic], [output_node])
    saves_best = SavesAtMinimum(model, make_output_filename(args, best=True))

    validation_loss_monitor = MeanOverEpoch(loss_node,
                                            callbacks=[saves_best])
    epoch_logger.subscribe_to("Validation Loss", validation_loss_monitor)

    validation_callback = ValidationCallback(
        inputs=[input_indices_symbolic.output_symbol],
        input_iterator=mnist_validation_iterator,
        epoch_callbacks=[validation_loss_monitor,
                         validation_misclassification_monitor])

    loss_function = theano.function([input_indices_symbolic.output_symbol], scalar_loss)
    cost_args = mnist_validation_iterator.next()
    print(loss_function(*cost_args))

    # trainer = Sgd((image_node.output_symbol, label_node.output_symbol),
    trainer = Sgd([input_indices_symbolic],
                  mnist_training_iterator,
                  callbacks=(parameter_updaters + [training_loss_monitor,
                              training_misclassification_monitor,
                              validation_callback]))

    '''
    stuff_to_pickle = OrderedDict(
        (('model', model),
         ('validation_loss_logger', validation_loss_logger)))

    # Pickling the trainer doesn't work when there are Dropout nodes.
    # stuff_to_pickle = OrderedDict(
    #     (('trainer', trainer),
    #      ('validation_loss_logger', validation_loss_logger),
    #      ('model', model)))

    trainer.epoch_callbacks += (momentum_updaters +
                                [EpochTimer(),
                                 PicklesOnEpoch(stuff_to_pickle,
                                                make_output_filename(args),
                                                overwrite=False),
                                 validation_callback,
                                 LimitsNumEpochs(max_epochs)])
    '''
    trainer.epoch_callbacks += (momentum_updaters +
                                [LimitsNumEpochs(max_epochs),
                                 epoch_timer])

    start_time = time.time()
    trainer.train()
    elapsed_time = time.time() - start_time

    print("Total elapsed time is for training is: ", elapsed_time)
def main():
    args = parse_args()

    # Hyperparameter values taken from Pylearn2:
    # In pylearn2/scripts/tutorials/multilayer_perceptron/:
    #   multilayer_perceptron.ipynb
    #   mlp_tutorial_part_3.yaml

    sizes = [500, 500, 10]
    sparse_init_counts = [15, 15]
    assert_equal(len(sparse_init_counts), len(sizes) - 1)

    assert_equal(sizes[-1], 10)

    mnist_training, mnist_testing = load_mnist()

    if args.validation_size == 0:
        # use testing set as validation set
        mnist_validation = mnist_testing
    else:
        # split training set into training and validation sets
        tensors = mnist_training.tensors
        training_tensors = [t[:-args.validation_size, ...] for t in tensors]
        validation_tensors = [t[-args.validation_size:, ...] for t in tensors]
        mnist_training = Dataset(tensors=training_tensors,
                                 names=mnist_training.names,
                                 formats=mnist_training.formats)
        mnist_validation = Dataset(tensors=validation_tensors,
                                   names=mnist_training.names,
                                   formats=mnist_training.formats)

    mnist_validation_iterator = mnist_validation.iterator(
        iterator_type='sequential',
        batch_size=args.batch_size)
    image_uint8_node, label_node = mnist_validation_iterator.make_input_nodes()
    image_node = CastNode(image_uint8_node, 'floatX')
    # image_node = RescaleImage(image_uint8_node)

    rng = numpy.random.RandomState(34523)
    theano_rng = RandomStreams(23845)

    (affine_nodes,
     output_node) = build_fc_classifier(image_node,
                                        sizes,
                                        sparse_init_counts,
                                        args.dropout_include_rates,
                                        rng,
                                        theano_rng)

    loss_node = CrossEntropy(output_node, label_node)
    loss_sum = loss_node.output_symbol.mean()
    max_epochs = 10000

    #
    # Makes parameter updaters
    #

    parameters = []
    parameter_updaters = []
    momentum_updaters = []
    for affine_node in affine_nodes:
        for params in (affine_node.linear_node.params,
                       affine_node.bias_node.params):
            parameters.append(params)
            gradients = theano.gradient.grad(loss_sum, params)
            parameter_updater = SgdParameterUpdater(params,
                                                    gradients,
                                                    args.learning_rate,
                                                    args.initial_momentum,
                                                    args.nesterov)
            parameter_updaters.append(parameter_updater)

            momentum_updaters.append(LinearlyInterpolatesOverEpochs(
                parameter_updater.momentum,
                args.final_momentum,
                args.epochs_to_momentum_saturation))

    #
    # Makes batch and epoch callbacks
    #

    def make_output_basename(args):
        assert_equal(os.path.splitext(args.output_prefix)[1], "")
        if os.path.isdir(args.output_prefix) and \
           not args.output_prefix.endswith('/'):
            args.output_prefix += '/'

        output_dir, output_prefix = os.path.split(args.output_prefix)
        if output_prefix != "":
            output_prefix = output_prefix + "_"

        output_prefix = os.path.join(output_dir, output_prefix)

        return "{}lr-{}_mom-{}_nesterov-{}_bs-{}".format(
            output_prefix,
            args.learning_rate,
            args.initial_momentum,
            args.nesterov,
            args.batch_size)

    epoch_logger = EpochLogger(make_output_basename(args) + "_log.h5")

    # misclassification_node = Misclassification(output_node, label_node)
    # mcr_logger = LogsToLists()
    # training_stopper = StopsOnStagnation(max_epochs=10,
    #                                      min_proportional_decrease=0.0)
    misclassification_node = Misclassification(output_node, label_node)

    validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[])
    epoch_logger.subscribe_to('validation mean loss', validation_loss_monitor)

    validation_misclassification_monitor = MeanOverEpoch(
        misclassification_node,
        callbacks=[print_mcr,
                   StopsOnStagnation(max_epochs=10,
                                     min_proportional_decrease=0.0)])

    epoch_logger.subscribe_to('validation misclassification',
                              validation_misclassification_monitor)

    # batch callback (monitor)
    # training_loss_logger = LogsToLists()
    training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss])
    epoch_logger.subscribe_to('training mean loss', training_loss_monitor)

    training_misclassification_monitor = MeanOverEpoch(misclassification_node,
                                                       callbacks=[])
    epoch_logger.subscribe_to('training misclassification %',
                              training_misclassification_monitor)

    # epoch callbacks
    # validation_loss_logger = LogsToLists()


    def make_output_filename(args, best=False):
        basename = make_output_basename(args)
        return "{}{}.pkl".format(basename, '_best' if best else "")

    model = SerializableModel([image_uint8_node], [output_node])
    saves_best = SavesAtMinimum(model, make_output_filename(args, best=True))

    validation_loss_monitor = MeanOverEpoch(
        loss_node,
        callbacks=[saves_best])

    epoch_logger.subscribe_to('validation loss', validation_loss_monitor)

    validation_callback = ValidationCallback(
        inputs=[image_uint8_node.output_symbol, label_node.output_symbol],
        input_iterator=mnist_validation_iterator,
        epoch_callbacks=[validation_loss_monitor,
                         validation_misclassification_monitor])

    trainer = Sgd([image_uint8_node, label_node],
                  mnist_training.iterator(iterator_type='sequential',
                                          batch_size=args.batch_size),
                  callbacks=(parameter_updaters +
                             momentum_updaters +
                             [training_loss_monitor,
                              training_misclassification_monitor,
                              validation_callback,
                              LimitsNumEpochs(max_epochs)]))
                                                   # validation_loss_monitor]))

    # stuff_to_pickle = OrderedDict(
    #     (('model', model),
    #      ('validation_loss_logger', validation_loss_logger)))

    # Pickling the trainer doesn't work when there are Dropout nodes.
    # stuff_to_pickle = OrderedDict(
    #     (('trainer', trainer),
    #      ('validation_loss_logger', validation_loss_logger),
    #      ('model', model)))

    # trainer.epoch_callbacks += (momentum_updaters +
    #                             [PicklesOnEpoch(stuff_to_pickle,
    #                                             make_output_filename(args),
    #                                             overwrite=False),
    #                              validation_callback,
    #                              LimitsNumEpochs(max_epochs)])

    trainer.train()
예제 #8
0
def main():
    '''
    Entry point of this script.
    '''

    args = parse_args()

    # Hyperparameter values taken from Pylearn2:
    # In pylearn2/scripts/tutorials/convolutional_network/:
    #   convolutional_network.ipynb

    filter_counts = [96, 192, 192]
    filter_init_uniform_ranges = [0.005]* len(filter_counts)
    filter_shapes = [(8, 8), (8,8), (5, 5)]
    pool_shapes = [(4, 4),(4, 4), (2, 2)]
    pool_strides = [(2, 2), (2, 2), (2,2)]
    pool_pads = [(2,2), (2,2), (2,2)]
    affine_output_sizes = [10]
    affine_init_stddevs = [.005] * len(affine_output_sizes)
    dropout_include_rates = [0.8, 0.5, 0.5, 0.5]
    #dropout_include_rates = ([.8 if args.dropout else 1.0] *
    #                         (len(filter_counts) + len(affine_output_sizes)))
    conv_pads = [(4, 4), (3, 3), (3, 3)]

    assert_equal(affine_output_sizes[-1], 10)

    def unpickle(file):
        import cPickle
        fo = open(file, 'rb')
        dict = cPickle.load(fo)
        fo.close()
        return dict

    batch1 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_1')
    batch2 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_2')
    batch3 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_3')
    batch4 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_4')
    batch5 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_5')

    training_tensors = [ numpy.concatenate((batch1['data'].reshape(10000,3,32,32), batch2['data'].reshape(10000,3,32,32), batch3['data'].reshape(10000,3,32,32), batch4['data'].reshape(10000,3,32,32) )), numpy.concatenate((batch1['labels'], batch2['labels'], batch3['labels'], batch4['labels'])) ]
    validation_tensors = [ batch5['data'].reshape(10000,3,32,32), numpy.asarray(batch5['labels']) ]

    shuffle_dataset = True
    if shuffle_dataset == True:
        def shuffle_in_unison_inplace(a, b):
            assert len(a) == len(b)
            p = numpy.random.permutation(len(a))
            return a[p], b[p]

        [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1])
        [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1])

    cifar10_training = Dataset(tensors=training_tensors,
                              names=('images', 'labels'),
                              formats=(DenseFormat(axes=('b', 'c', '0', '1'),
                                                  shape=(-1,3, 32, 32),
                                                  dtype='uint8'),
                                      DenseFormat(axes=('b',),
                                                  shape=(-1, ),
                                                  dtype='int64')))
    cifar10_validation = Dataset(tensors=validation_tensors,
                                names=('images', 'labels'),
                                formats=(DenseFormat(axes=('b', 'c', '0', '1'),
                                                    shape=(-1,3, 32, 32),
                                                    dtype='uint8'),
                                        DenseFormat(axes=('b',),
                                                    shape=(-1, ),
                                                    dtype='int64')))

    cifar10_validation_iterator = cifar10_validation.iterator(
        iterator_type='sequential',
        loop_style='divisible',
        batch_size=args.batch_size)
    image_uint8_node, label_node = cifar10_validation_iterator.make_input_nodes()
    image_node = RescaleImage(image_uint8_node)
    image_node_lcn = Lcn(image_node)
    # image_node = RescaleImage(image_uint8_node)

    rng = numpy.random.RandomState(3447523)
    theano_rng = RandomStreams(2387345)

    (conv_layers,
     affine_layers,
     output_node) = build_conv_classifier(image_node_lcn,
                                          filter_shapes,
                                          filter_counts,
                                          filter_init_uniform_ranges,
                                          pool_shapes,
                                          pool_strides,
                                          pool_pads,
                                          affine_output_sizes,
                                          affine_init_stddevs,
                                          dropout_include_rates,
                                          conv_pads,
                                          rng,
                                          theano_rng)

    loss_node = CrossEntropy(output_node, label_node)
    scalar_loss = loss_node.output_symbol.mean()

    if args.weight_decay != 0.0:
        for conv_layer in conv_layers:
            filters = conv_layer.conv2d_node.filters
            filter_loss = args.weight_decay * theano.tensor.sqr(filters).sum()
            scalar_loss = scalar_loss + filter_loss

        for affine_layer in affine_layers:
            weights = affine_layer.affine_node.linear_node.params
            weight_loss = args.weight_decay * theano.tensor.sqr(weights).sum()
            scalar_loss = scalar_loss + weight_loss

    max_epochs = 500

    #
    # Makes parameter updaters
    #

    parameters = []
    parameter_updaters = []
    momentum_updaters = []

    def add_updaters(parameter,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters):
        '''
        Adds a ParameterUpdater to parameter_updaters, and a
        LinearlyInterpolatesOverEpochs to momentum_updaters.
        '''
        gradient = theano.gradient.grad(scalar_loss, parameter)
        parameter_updaters.append(SgdParameterUpdater(parameter,
                                                      gradient,
                                                      args.learning_rate,
                                                      args.initial_momentum,
                                                      not args.no_nesterov))
        momentum_updaters.append(LinearlyInterpolatesOverEpochs(
            parameter_updaters[-1].momentum,
            args.final_momentum,
            args.epochs_to_momentum_saturation))

    for conv_layer in conv_layers:
        filters = conv_layer.conv2d_node.filters
        parameters.append(filters)
        add_updaters(filters,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)

        if args.max_filter_norm != numpy.inf:
            limit_param_norms(parameter_updaters[-1],
                              filters,
                              args.max_filter_norm,
                              (1, 2, 3))

        bias = conv_layer.bias_node.params
        parameters.append(bias)
        add_updaters(bias,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)

    for affine_layer in affine_layers:
        weights = affine_layer.affine_node.linear_node.params
        parameters.append(weights)
        add_updaters(weights,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)
        if args.max_col_norm != numpy.inf:
            limit_param_norms(parameter_updater=parameter_updaters[-1],
                              param=weights,
                              max_norm=args.max_col_norm,
                              input_axes=[0])

        biases = affine_layer.affine_node.bias_node.params
        parameters.append(biases)
        add_updaters(biases,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)

    #
    # Makes batch and epoch callbacks
    #

    def make_misclassification_monitor():
        '''
        Returns an MeanOverEpoch of the misclassification rate.
        '''
        misclassification_node = Misclassification(output_node, label_node)
        mcr_logger = LogsToLists()
        training_stopper = StopsOnStagnation(max_epochs=10,
                                             min_proportional_decrease=0.0)
        return MeanOverEpoch(misclassification_node,
                             callbacks=[print_misclassification_rate,
                                        mcr_logger,
                                        training_stopper])

    mcr_monitor = make_misclassification_monitor()

    # batch callback (monitor)
    training_loss_logger = LogsToLists()
    training_loss_monitor = MeanOverEpoch(loss_node,
                                          callbacks=[print_loss,
                                                     training_loss_logger])

    # epoch callbacks
    validation_loss_logger = LogsToLists()

    def make_output_filename(args, best=False):
        '''
        Constructs a filename that reflects the command-line params.
        '''
        assert_equal(os.path.splitext(args.output_prefix)[1], "")

        if os.path.isdir(args.output_prefix):
            output_dir, output_prefix = args.output_prefix, ""
        else:
            output_dir, output_prefix = os.path.split(args.output_prefix)
            assert_true(os.path.isdir(output_dir))

        if output_prefix != "":
            output_prefix = output_prefix + "_"

        output_prefix = os.path.join(output_dir, output_prefix)

        return ("%slr-%g_mom-%g_nesterov-%s_bs-%d%s.pkl" %
                (output_prefix,
                 args.learning_rate,
                 args.initial_momentum,
                 not args.no_nesterov,
                 args.batch_size,
                 "_best" if best else ""))

    model = SerializableModel([image_uint8_node], [output_node])
    saves_best = SavesAtMinimum(model, make_output_filename(args, best=True))

    validation_loss_monitor = MeanOverEpoch(loss_node,
                                            callbacks=[validation_loss_logger,
                                                       saves_best])

    validation_callback = ValidationCallback(
        inputs=[image_uint8_node.output_symbol, label_node.output_symbol],
        input_iterator=cifar10_validation_iterator,
        epoch_callbacks=[validation_loss_monitor, mcr_monitor])

    # trainer = Sgd((image_node.output_symbol, label_node.output_symbol),
    trainer = Sgd([image_uint8_node, label_node],
                  cifar10_training.iterator(iterator_type='sequential',
                                          loop_style='divisible',
                                          batch_size=args.batch_size),
                  callbacks=(parameter_updaters + [training_loss_monitor]))

    stuff_to_pickle = OrderedDict(
        (('model', model),
         ('validation_loss_logger', validation_loss_logger)))

    # Pickling the trainer doesn't work when there are Dropout nodes.
    # stuff_to_pickle = OrderedDict(
    #     (('trainer', trainer),
    #      ('validation_loss_logger', validation_loss_logger),
    #      ('model', model)))

    trainer.epoch_callbacks += (momentum_updaters +
                                [PicklesOnEpoch(stuff_to_pickle,
                                                make_output_filename(args),
                                                overwrite=False),
                                 validation_callback,
                                 LimitsNumEpochs(max_epochs),
                                 epoch_timer])

    trainer.train()
예제 #9
0
def main():
    '''
    Entry point of this script.
    '''

    args = parse_args()

    # Hyperparameter values taken from Pylearn2:
    # In pylearn2/scripts/tutorials/convolutional_network/:
    #   convolutional_network.ipynb

    filter_counts = [64, 64]
    filter_init_uniform_ranges = [.05] * len(filter_counts)
    filter_shapes = [(5, 5), (5, 5)]
    pool_shapes = [(4, 4), (4, 4)]
    pool_strides = [(2, 2), (2, 2)]
    affine_output_sizes = [10]
    affine_init_stddevs = [.05] * len(affine_output_sizes)
    dropout_include_rates = ([.5 if args.dropout else 1.0] *
                             (len(filter_counts) + len(affine_output_sizes)))

    assert_equal(affine_output_sizes[-1], 10)

    mnist_training, mnist_testing = load_mnist()

    if args.validation_size == 0:
        # use testing set as validation set
        mnist_validation = mnist_testing
    else:
        # split training set into training and validation sets
        tensors = mnist_training.tensors
        training_tensors = [t[:-args.validation_size, ...] for t in tensors]
        validation_tensors = [t[-args.validation_size:, ...] for t in tensors]
        mnist_training = Dataset(tensors=training_tensors,
                                 names=mnist_training.names,
                                 formats=mnist_training.formats)
        mnist_validation = Dataset(tensors=validation_tensors,
                                   names=mnist_training.names,
                                   formats=mnist_training.formats)

    mnist_validation_iterator = mnist_validation.iterator(
        iterator_type='sequential',
        loop_style='divisible',
        batch_size=args.batch_size)

    image_uint8_node, label_node = mnist_validation_iterator.make_input_nodes()
    image_node = RescaleImage(image_uint8_node)

    rng = numpy.random.RandomState(1234)
    theano_rng = RandomStreams(23845)

    (conv_layers,
     affine_layers,
     output_node) = build_conv_classifier(image_node,
                                          filter_shapes,
                                          filter_counts,
                                          filter_init_uniform_ranges,
                                          pool_shapes,
                                          pool_strides,
                                          affine_output_sizes,
                                          affine_init_stddevs,
                                          dropout_include_rates,
                                          rng,
                                          theano_rng)

    loss_node = CrossEntropy(output_node, label_node)
    scalar_loss = loss_node.output_symbol.mean()

    if args.weight_decay != 0.0:
        for conv_layer in conv_layers:
            filters = conv_layer.conv2d_node.filters
            filter_loss = args.weight_decay * theano.tensor.sqr(filters).sum()
            scalar_loss = scalar_loss + filter_loss

        for affine_layer in affine_layers:
            weights = affine_layer.affine_node.linear_node.params
            weight_loss = args.weight_decay * theano.tensor.sqr(weights).sum()
            scalar_loss = scalar_loss + weight_loss

    max_epochs = 500

    #
    # Makes parameter updaters
    #

    parameters = []
    parameter_updaters = []
    momentum_updaters = []

    def add_updaters(parameter,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters):
        '''
        Adds a ParameterUpdater to parameter_updaters, and a
        LinearlyInterpolatesOverEpochs to momentum_updaters.
        '''
        gradient = theano.gradient.grad(scalar_loss, parameter)
        parameter_updaters.append(SgdParameterUpdater(parameter,
                                                      gradient,
                                                      args.learning_rate,
                                                      args.initial_momentum,
                                                      not args.no_nesterov))
        momentum_updaters.append(LinearlyInterpolatesOverEpochs(
            parameter_updaters[-1].momentum,
            args.final_momentum,
            args.epochs_to_momentum_saturation))

    for conv_layer in conv_layers:
        filters = conv_layer.conv2d_node.filters
        parameters.append(filters)
        add_updaters(filters,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)

        if args.max_filter_norm != numpy.inf:
            limit_param_norms(parameter_updaters[-1],
                              filters,
                              args.max_filter_norm,
                              (1, 2, 3))

        bias = conv_layer.bias_node.params
        parameters.append(bias)
        add_updaters(bias,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)

    for affine_layer in affine_layers:
        weights = affine_layer.affine_node.linear_node.params
        parameters.append(weights)
        add_updaters(weights,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)
        if args.max_col_norm != numpy.inf:
            limit_param_norms(parameter_updater=parameter_updaters[-1],
                              param=weights,
                              max_norm=args.max_col_norm,
                              input_axes=[0])

        biases = affine_layer.affine_node.bias_node.params
        parameters.append(biases)
        add_updaters(biases,
                     scalar_loss,
                     parameter_updaters,
                     momentum_updaters)

    #
    # Makes batch and epoch callbacks
    #

    def make_misclassification_monitor():
        '''
        Returns an MeanOverEpoch of the misclassification rate.
        '''
        misclassification_node = Misclassification(output_node, label_node)
        mcr_logger = LogsToLists()
        training_stopper = StopsOnStagnation(max_epochs=10,
                                             min_proportional_decrease=0.0)
        return MeanOverEpoch(misclassification_node,
                             callbacks=[print_misclassification_rate,
                                        mcr_logger,
                                        training_stopper])

    mcr_monitor = make_misclassification_monitor()

    # batch callback (monitor)
    training_loss_logger = LogsToLists()
    training_loss_monitor = MeanOverEpoch(loss_node,
                                          callbacks=[print_loss,
                                                     training_loss_logger])

    # epoch callbacks
    validation_loss_logger = LogsToLists()

    def make_output_filename(args, best=False):
        '''
        Constructs a filename that reflects the command-line params.
        '''
        assert_equal(os.path.splitext(args.output_prefix)[1], "")

        if os.path.isdir(args.output_prefix):
            output_dir, output_prefix = args.output_prefix, ""
        else:
            output_dir, output_prefix = os.path.split(args.output_prefix)
            assert_true(os.path.isdir(output_dir))

        if output_prefix != "":
            output_prefix = output_prefix + "_"

        output_prefix = os.path.join(output_dir, output_prefix)

        return ("%slr-%g_mom-%g_nesterov-%s_bs-%d%s.pkl" %
                (output_prefix,
                 args.learning_rate,
                 args.initial_momentum,
                 not args.no_nesterov,
                 args.batch_size,
                 "_best" if best else ""))

    model = SerializableModel([image_uint8_node], [output_node])
    saves_best = SavesAtMinimum(model, make_output_filename(args, best=True))

    validation_loss_monitor = MeanOverEpoch(loss_node,
                                            callbacks=[validation_loss_logger,
                                                       saves_best])

    validation_callback = ValidationCallback(
        inputs=[image_uint8_node.output_symbol, label_node.output_symbol],
        input_iterator=mnist_validation_iterator,
        epoch_callbacks=[validation_loss_monitor, mcr_monitor])

    # trainer = Sgd((image_node.output_symbol, label_node.output_symbol),
    trainer = Sgd([image_uint8_node, label_node],
                  mnist_training.iterator(iterator_type='sequential',
                                          loop_style='divisible',
                                          batch_size=args.batch_size),
                  callbacks=(parameter_updaters + [training_loss_monitor]))

    stuff_to_pickle = OrderedDict(
        (('model', model),
         ('validation_loss_logger', validation_loss_logger)))

    # Pickling the trainer doesn't work when there are Dropout nodes.
    # stuff_to_pickle = OrderedDict(
    #     (('trainer', trainer),
    #      ('validation_loss_logger', validation_loss_logger),
    #      ('model', model)))

    trainer.epoch_callbacks += (momentum_updaters +
                                [EpochTimer(),
                                 PicklesOnEpoch(stuff_to_pickle,
                                                make_output_filename(args),
                                                overwrite=False),
                                 validation_callback,
                                 LimitsNumEpochs(max_epochs)])

    trainer.train()
def main():
    args = parse_args()

    # Hyperparameter values taken from Pylearn2:
    # In pylearn2/scripts/tutorials/multilayer_perceptron/:
    #   multilayer_perceptron.ipynb
    #   mlp_tutorial_part_3.yaml

    sizes = [500, 500, 10]
    sparse_init_counts = [15, 15]
    assert_equal(len(sparse_init_counts), len(sizes) - 1)

    assert_equal(sizes[-1], 10)

    '''
    mnist_training, mnist_testing = load_mnist()

    if args.validation_size == 0:
        # use testing set as validation set
        mnist_validation = mnist_testing
    else:
        # split training set into training and validation sets
        tensors = mnist_training.tensors
        size_tensors = tensors[0].shape[0]
        training_tensors = [t[:-args.validation_size, ...] for t in tensors]
        validation_tensors = [t[size_tensors - args.validation_size:, ...] for t in tensors]

        shuffle_dataset = True
        if shuffle_dataset == True:
            def shuffle_in_unison_inplace(a, b):
                assert len(a) == len(b)
                p = numpy.random.permutation(len(a))
                return a[p], b[p]

            [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1])
            [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1])
        '''

    def unpickle(file):
        import cPickle
        fo = open(file, 'rb')
        dict = cPickle.load(fo)
        fo.close()
        return dict

    batch1 = unpickle('/home/paul/cifar-10-batches-py/data_batch_1')
    batch2 = unpickle('/home/paul/cifar-10-batches-py/data_batch_2')
    batch3 = unpickle('/home/paul/cifar-10-batches-py/data_batch_3')
    batch4 = unpickle('/home/paul/cifar-10-batches-py/data_batch_4')
    batch5 = unpickle('/home/paul/cifar-10-batches-py/data_batch_5')

    training_tensors = [ numpy.concatenate((batch1['data'].reshape(10000,3,32,32), batch2['data'].reshape(10000,3,32,32), batch3['data'].reshape(10000,3,32,32), batch4['data'].reshape(10000,3,32,32) )), numpy.concatenate((batch1['labels'], batch2['labels'], batch3['labels'], batch4['labels'])) ]
    validation_tensors = [ batch5['data'].reshape(10000,3,32,32), numpy.asarray(batch5['labels']) ]

    shuffle_dataset = True
    if shuffle_dataset == True:
        def shuffle_in_unison_inplace(a, b):
            assert len(a) == len(b)
            p = numpy.random.permutation(len(a))
            return a[p], b[p]

        [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1])
        [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1])

    cifar10_training = Dataset(tensors=training_tensors,
                              names=('images', 'labels'),
                              formats=(DenseFormat(axes=('b', 'c', '0', '1'),
                                                  shape=(-1,3, 32, 32),
                                                  dtype='uint8'),
                                      DenseFormat(axes=('b',),
                                                  shape=(-1, ),
                                                  dtype='int64')))
    cifar10_validation = Dataset(tensors=validation_tensors,
                                names=('images', 'labels'),
                                formats=(DenseFormat(axes=('b', 'c', '0', '1'),
                                                    shape=(-1,3, 32, 32),
                                                    dtype='uint8'),
                                        DenseFormat(axes=('b',),
                                                    shape=(-1, ),
                                                    dtype='int64')))

    cifar10_validation_iterator = cifar10_validation.iterator(
        iterator_type='sequential',
        batch_size=args.batch_size)
    image_uint8_node, label_node = cifar10_validation_iterator.make_input_nodes()
    image_node = CastNode(image_uint8_node, 'floatX')
    image_node_lcn = Lcn(image_node)
    # image_node = RescaleImage(image_uint8_node)

    rng = numpy.random.RandomState(3447523)
    theano_rng = RandomStreams(2387345)

    (affine_nodes,
     output_node) = build_fc_classifier(image_node_lcn,
                                        sizes,
                                        sparse_init_counts,
                                        args.dropout_include_rates,
                                        rng,
                                        theano_rng)

    loss_node = CrossEntropy(output_node, label_node)
    loss_sum = loss_node.output_symbol.mean()
    max_epochs = 10000

    #
    # Makes parameter updaters
    #

    parameters = []
    parameter_updaters = []
    momentum_updaters = []
    for affine_node in affine_nodes:
        for params in (affine_node.linear_node.params,
                       affine_node.bias_node.params):
            parameters.append(params)
            gradients = theano.gradient.grad(loss_sum, params)
            parameter_updater = SgdParameterUpdater(params,
                                                    gradients,
                                                    args.learning_rate,
                                                    args.initial_momentum,
                                                    args.nesterov)
            parameter_updaters.append(parameter_updater)

            momentum_updaters.append(LinearlyInterpolatesOverEpochs(
                parameter_updater.momentum,
                args.final_momentum,
                args.epochs_to_momentum_saturation))

    #
    # Makes batch and epoch callbacks
    #

    def make_output_basename(args):
        assert_equal(os.path.splitext(args.output_prefix)[1], "")
        if os.path.isdir(args.output_prefix) and \
           not args.output_prefix.endswith('/'):
            args.output_prefix += '/'

        output_dir, output_prefix = os.path.split(args.output_prefix)
        if output_prefix != "":
            output_prefix = output_prefix + "_"

        output_prefix = os.path.join(output_dir, output_prefix)

        return "{}lr-{}_mom-{}_nesterov-{}_bs-{}".format(
            output_prefix,
            args.learning_rate,
            args.initial_momentum,
            args.nesterov,
            args.batch_size)

    epoch_logger = EpochLogger(make_output_basename(args) + "_log.h5")

    # misclassification_node = Misclassification(output_node, label_node)
    # mcr_logger = LogsToLists()
    # training_stopper = StopsOnStagnation(max_epochs=10,
    #                                      min_proportional_decrease=0.0)
    misclassification_node = Misclassification(output_node, label_node)

    validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[])
    epoch_logger.subscribe_to('validation mean loss', validation_loss_monitor)

    validation_misclassification_monitor = MeanOverEpoch(
        misclassification_node,
        callbacks=[print_mcr,
                   StopsOnStagnation(max_epochs=10,
                                     min_proportional_decrease=0.0)])

    epoch_logger.subscribe_to('validation misclassification',
                              validation_misclassification_monitor)

    # batch callback (monitor)
    # training_loss_logger = LogsToLists()
    training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss])
    epoch_logger.subscribe_to('training mean loss', training_loss_monitor)

    training_misclassification_monitor = MeanOverEpoch(misclassification_node,
                                                       callbacks=[])
    epoch_logger.subscribe_to('training misclassification %',
                              training_misclassification_monitor)

    # epoch callbacks
    # validation_loss_logger = LogsToLists()


    def make_output_filename(args, best=False):
        basename = make_output_basename(args)
        return "{}{}.pkl".format(basename, '_best' if best else "")

    model = SerializableModel([image_uint8_node], [output_node])
    saves_best = SavesAtMinimum(model, make_output_filename(args, best=True))

    validation_loss_monitor = MeanOverEpoch(
        loss_node,
        callbacks=[saves_best])

    epoch_logger.subscribe_to('validation loss', validation_loss_monitor)

    validation_callback = ValidationCallback(
        inputs=[image_uint8_node.output_symbol, label_node.output_symbol],
        input_iterator=cifar10_validation_iterator,
        epoch_callbacks=[validation_loss_monitor,
                         validation_misclassification_monitor])

    trainer = Sgd([image_uint8_node, label_node],
                  cifar10_training.iterator(iterator_type='sequential',
                                          batch_size=args.batch_size),
                  callbacks=(parameter_updaters +
                             momentum_updaters +
                             [training_loss_monitor,
                              training_misclassification_monitor,
                              validation_callback,
                              LimitsNumEpochs(max_epochs)]))
                                                   # validation_loss_monitor]))

    # stuff_to_pickle = OrderedDict(
    #     (('model', model),
    #      ('validation_loss_logger', validation_loss_logger)))

    # Pickling the trainer doesn't work when there are Dropout nodes.
    # stuff_to_pickle = OrderedDict(
    #     (('trainer', trainer),
    #      ('validation_loss_logger', validation_loss_logger),
    #      ('model', model)))

    # trainer.epoch_callbacks += (momentum_updaters +
    #                             [PicklesOnEpoch(stuff_to_pickle,
    #                                             make_output_filename(args),
    #                                             overwrite=False),
    #                              validation_callback,
    #                              LimitsNumEpochs(max_epochs)])

    trainer.train()