def get_optimized_images(float_image): optimized_images = input_float_node.output_format.make_batch( is_symbolic=False, batch_size=10) for i in xrange(model.output_nodes[0].output_format.shape[1]): print("optimizing image w.r.t. '%d' label" % i) param_updater = SgdParameterUpdater( shared_input_float, loss_symbol, learning_rate=args.learning_rate, momentum=args.momentum, use_nesterov=args.nesterov) sgd = Sgd(input_nodes=[], input_iterator=DummyIterator(), callbacks=[param_updater, LimitsNumEpochs(args.max_iterations)]) shared_input_float.set_value(float_image) shared_label.set_value(numpy.asarray([i], dtype=shared_label.dtype)) sgd.train() optimized_images[i, ...] = shared_input_float.get_value()[0, ...] return optimized_images
def test_mean_over_epoch(): rng = numpy.random.RandomState(3851) vectors = rng.uniform(-1.0, 1.0, size=(12, 10)) fmt = DenseFormat(axes=('b', 'f'), shape=(-1, 10), dtype=vectors.dtype) dataset = Dataset(names=['vectors'], formats=[fmt], tensors=[vectors]) iterator = dataset.iterator('sequential', batch_size=2, loop_style="divisible") input_node = iterator.make_input_nodes()[0] l2_norm_node = L2Norm(input_node) num_averages_compared = [0] def compare_with_expected_average(values, _): # ignore format argument assert_equal(len(values), 1) average = values[0] assert_is_instance(fmt, DenseFormat) l2_norms = numpy.sqrt((vectors ** 2.0).sum(fmt.axes.index('f'))) expected_average = l2_norms.sum() / l2_norms.size assert_allclose(average, expected_average) num_averages_compared[0] += 1 average_monitor = MeanOverEpoch(l2_norm_node, [compare_with_expected_average]) class DatasetRandomizer(EpochCallback): ''' Fills the dataset with a fresh set of random values after each epoch. ''' def on_start_training(self): pass def on_epoch(self): vectors[...] = rng.uniform(-1.0, 1.0, size=vectors.shape) trainer = Sgd([input_node], iterator, callbacks=[average_monitor, LimitsNumEpochs(3), DatasetRandomizer()]) trainer.train() assert_equal(num_averages_compared[0], 3)
def test_limit_param_norms(): ''' A unit test for limit_param_norms(). Optimizes a simple function f = ||W - x||, with a limit on W's norms. Initial value of W is 0. ||W - x|| is bigger than W's max norm. Therefore, we expect the final value of W to be k, scaled to max_norm. ''' floatX = theano.config.floatX def make_single_example_dataset(norm, shape, rng): ''' Returns a Dataset with a single datum with a given L2 norm. Parameters ---------- norm: float The L2 norm that the flattened datum should have. shape: Sequence The shape of the datum. Returns ------- rval: Dataset ''' axes = ('b', ) + tuple(str(i) for i in range(len(shape))) fmt = DenseFormat(axes=axes, shape=(-1, ) + shape, dtype=floatX) data = fmt.make_batch(batch_size=1, is_symbolic=False) data[...] = rng.uniform(low=-1.0, high=1.0, size=data.shape) sum_axes = tuple(range(1, len(shape) + 1)) # Scale all data so that L2 norms = norm norms = numpy.sqrt((data ** 2.0).sum(axis=sum_axes, keepdims=True)) scales = norm / (norms + .00001) data *= scales return Dataset(tensors=[data], formats=[fmt], names=['data']) def make_costs_node(input_node, weights): ''' Returns a Node that computes the squared distance between input_node and weights. ''' assert_is_instance(input_node, Node) flat_shape = (input_node.output_symbol.shape[0], -1) input_vectors = input_node.output_symbol.reshape(flat_shape) weight_vectors = weights.reshape((weights.shape[0], -1)) diff = input_vectors - weight_vectors costs = T.sqr(diff).sum(axis=1) return Node([input_node], costs, DenseFormat(axes=['b'], shape=[-1], dtype=weights.dtype)) dataset_norm = .3 max_norm = .2 learning_rate = .001 rng = numpy.random.RandomState(325) def print_cost(monitored_value, fmt): print("avg cost: %s" % monitored_value) def print_weight_norm(monitored_values, fmt): assert_equal(len(monitored_values), 1) weights = monitored_values[0] norm = numpy.sqrt((weights.get_value() ** 2.0).sum()) print("weights' norm: %s" % norm) for shape in ((2, ), (2, 3, 4)): dataset = make_single_example_dataset(dataset_norm, shape, rng) weights = theano.shared(numpy.zeros((1, ) + shape, dtype=floatX)) training_iterator = dataset.iterator(iterator_type='sequential', batch_size=1) input_nodes = training_iterator.make_input_nodes() assert_equal(len(input_nodes), 1) costs_node = make_costs_node(input_nodes[0], weights) gradients = theano.gradient.grad(costs_node.output_symbol.mean(), weights) param_updater = SgdParameterUpdater(parameter=weights, gradient=gradients, learning_rate=learning_rate, momentum=0.0, use_nesterov=False) input_axes = tuple(range(1, len(shape) + 1)) limit_param_norms(param_updater, weights, max_norm, input_axes) stops_on_stagnation = StopsOnStagnation(max_epochs=10) average_cost_monitor = MeanOverEpoch(costs_node, callbacks=[stops_on_stagnation]) sgd = Sgd(inputs=input_nodes, input_iterator=training_iterator, callbacks=[param_updater, average_cost_monitor]) sgd.train() weight_norm = numpy.sqrt((weights.get_value() ** 2.0).sum()) assert_almost_equal(weight_norm, max_norm, decimal=6) # an optional sanity-check to confirm that the weights are on a # straight line between their initial value (0.0) and the data. normed_weights = weights.get_value() / weight_norm normed_data = dataset.tensors[0] / dataset_norm assert_allclose(normed_weights, normed_data, rtol=learning_rate * 10)
# Measures the average misclassification rate over some dataset misclassification_rate_monitor = AverageMonitor(misclassification_node.output_symbol, misclassification_node.output_format, callbacks=[print_misclassification_rate, saves_best, training_stopper]) validation_iter = validation_set.iterator(iterator_type='sequential', batch_size=100) # Gets called by trainer between training epochs. validation_callback = ValidationCallback(inputs=[image_node.output_symbol, label_node.output_symbol], input_iterator=validation_iter, monitors=[misclassification_rate_monitor]) trainer = Sgd([image_node, label_node], training_iter, param_symbols, param_updaters, monitors=[], epoch_callbacks=[validation_callback, # measure validation misclassification rate, quit if it stops falling LimitsNumEpochs(100)]) # perform no more than 100 epochs start_time = time.time() trainer.train() elapsed_time = time.time() - start_time print "The time elapsed for training is ", elapsed_time
def main(): args = parse_args() # Hyperparameter values taken from Pylearn2: # In pylearn2/scripts/tutorials/multilayer_perceptron/: # multilayer_perceptron.ipynb # mlp_tutorial_part_3.yaml sizes = [500, 500, 10] sparse_init_counts = [15, 15] assert_equal(len(sparse_init_counts), len(sizes) - 1) assert_equal(sizes[-1], 10) mnist_training, mnist_testing = load_mnist() # split training set into training and validation sets tensors = mnist_training.tensors training_tensors = [t[: -args.validation_size, ...] for t in tensors] validation_tensors = [t[-args.validation_size :, ...] for t in tensors] if args.no_shuffle_dataset == False: def shuffle_in_unison_inplace(a, b): assert len(a) == len(b) p = numpy.random.permutation(len(a)) return a[p], b[p] [training_tensors[0], training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0], training_tensors[1]) [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace( validation_tensors[0], validation_tensors[1] ) all_images_shared = theano.shared(numpy.vstack([training_tensors[0], validation_tensors[0]])) all_labels_shared = theano.shared(numpy.concatenate([training_tensors[1], validation_tensors[1]])) length_training = training_tensors[0].shape[0] length_validation = validation_tensors[0].shape[0] indices_training = numpy.asarray(range(length_training)) indices_validation = numpy.asarray(range(length_training, length_training + length_validation)) indices_training_dataset = Dataset( tensors=[indices_training], names=["indices"], formats=[DenseFormat(axes=["b"], shape=[-1], dtype="int64")] ) indices_validation_dataset = Dataset( tensors=[indices_validation], names=["indices"], formats=[DenseFormat(axes=["b"], shape=[-1], dtype="int64")] ) indices_training_iterator = indices_training_dataset.iterator( iterator_type="sequential", batch_size=args.batch_size ) indices_validation_iterator = indices_validation_dataset.iterator(iterator_type="sequential", batch_size=10000) mnist_validation_iterator = indices_validation_iterator mnist_training_iterator = indices_training_iterator input_indices_symbolic, = indices_training_iterator.make_input_nodes() image_lookup_node = ImageLookeupNode(input_indices_symbolic, all_images_shared) label_lookup_node = LabelLookeupNode(input_indices_symbolic, all_labels_shared) image_node = CastNode(image_lookup_node, "floatX") # image_node = RescaleImage(image_uint8_node) rng = numpy.random.RandomState(34523) theano_rng = RandomStreams(23845) (affine_nodes, output_node) = build_fc_classifier( image_node, sizes, sparse_init_counts, args.dropout_include_rates, rng, theano_rng ) loss_node = CrossEntropy(output_node, label_lookup_node) loss_sum = loss_node.output_symbol.mean() max_epochs = 200 # # Makes parameter updaters # parameters = [] parameter_updaters = [] momentum_updaters = [] for affine_node in affine_nodes: for params in (affine_node.linear_node.params, affine_node.bias_node.params): parameters.append(params) gradients = theano.gradient.grad(loss_sum, params) parameter_updater = SgdParameterUpdater( params, gradients, args.learning_rate, args.initial_momentum, args.nesterov ) parameter_updaters.append(parameter_updater) momentum_updaters.append( LinearlyInterpolatesOverEpochs( parameter_updater.momentum, args.final_momentum, args.epochs_to_momentum_saturation ) ) # # Makes batch and epoch callbacks # """ def make_output_basename(args): assert_equal(os.path.splitext(args.output_prefix)[1], "") if os.path.isdir(args.output_prefix) and \ not args.output_prefix.endswith('/'): args.output_prefix += '/' output_dir, output_prefix = os.path.split(args.output_prefix) if output_prefix != "": output_prefix = output_prefix + "_" output_prefix = os.path.join(output_dir, output_prefix) return "{}lr-{}_mom-{}_nesterov-{}_bs-{}".format( output_prefix, args.learning_rate, args.initial_momentum, args.nesterov, args.batch_size) """ assert_equal(os.path.splitext(args.output_prefix)[1], "") if os.path.isdir(args.output_prefix) and not args.output_prefix.endswith("/"): args.output_prefix += "/" output_dir, output_prefix = os.path.split(args.output_prefix) if output_prefix != "": output_prefix = output_prefix + "_" output_prefix = os.path.join(output_dir, output_prefix) epoch_logger = EpochLogger(output_prefix + "SGD_nesterov.h5") # misclassification_node = Misclassification(output_node, label_node) # mcr_logger = LogsToLists() # training_stopper = StopsOnStagnation(max_epochs=10, # min_proportional_decrease=0.0) misclassification_node = Misclassification(output_node, label_lookup_node) validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[]) epoch_logger.subscribe_to("validation mean loss", validation_loss_monitor) validation_misclassification_monitor = MeanOverEpoch( misclassification_node, callbacks=[print_mcr, StopsOnStagnation(max_epochs=20, min_proportional_decrease=0.0)] ) epoch_logger.subscribe_to("validation misclassification", validation_misclassification_monitor) # batch callback (monitor) # training_loss_logger = LogsToLists() training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss]) epoch_logger.subscribe_to("training mean loss", training_loss_monitor) training_misclassification_monitor = MeanOverEpoch(misclassification_node, callbacks=[]) epoch_logger.subscribe_to("training misclassification %", training_misclassification_monitor) # epoch callbacks # validation_loss_logger = LogsToLists() def make_output_filename(args, best=False): basename = make_output_basename(args) return "{}{}.pkl".format(basename, "_best" if best else "") # model = SerializableModel([input_indices_symbolic], [output_node]) # saves_best = SavesAtMinimum(model, make_output_filename(args, best=True)) validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[]) epoch_logger.subscribe_to("validation loss", validation_loss_monitor) epoch_timer = EpochTimer2() epoch_logger.subscribe_to("epoch duration", epoch_timer) validation_callback = ValidationCallback( inputs=[input_indices_symbolic.output_symbol], input_iterator=mnist_validation_iterator, epoch_callbacks=[validation_loss_monitor, validation_misclassification_monitor], ) trainer = Sgd( [input_indices_symbolic], mnist_training_iterator, callbacks=( parameter_updaters + momentum_updaters + [ # training_loss_monitor, # training_misclassification_monitor, validation_callback, LimitsNumEpochs(max_epochs), epoch_timer, ] ), ) # validation_loss_monitor])) # stuff_to_pickle = OrderedDict( # (('model', model), # ('validation_loss_logger', validation_loss_logger))) # Pickling the trainer doesn't work when there are Dropout nodes. # stuff_to_pickle = OrderedDict( # (('trainer', trainer), # ('validation_loss_logger', validation_loss_logger), # ('model', model))) # trainer.epoch_callbacks += (momentum_updaters + # [PicklesOnEpoch(stuff_to_pickle, # make_output_filename(args), # overwrite=False), # validation_callback, # LimitsNumEpochs(max_epochs)]) start_time = time.time() trainer.train() elapsed_time = time.time() - start_time print("Total elapsed time is for training is: ", elapsed_time)
def main(): ''' Entry point of this script. ''' args = parse_args() # Hyperparameter values taken from Pylearn2: # In pylearn2/scripts/tutorials/convolutional_network/: # convolutional_network.ipynb filter_counts = [96, 192, 192] filter_init_uniform_ranges = [0.005]* len(filter_counts) filter_shapes = [(8, 8), (8,8), (5, 5)] pool_shapes = [(4, 4),(4, 4), (2, 2)] pool_strides = [(2, 2), (2, 2), (2,2)] pool_pads = [(2,2), (2,2), (2,2)] affine_output_sizes = [10] affine_init_stddevs = [.05] * len(affine_output_sizes) dropout_include_rates = [0.8, 0.5, 0.5, 0.5] #dropout_include_rates = ([.8 if args.dropout else 1.0] * # (len(filter_counts) + len(affine_output_sizes))) conv_pads = [(4, 4), (3, 3), (3, 3)] assert_equal(affine_output_sizes[-1], 10) def unpickle(file): import cPickle fo = open(file, 'rb') dict = cPickle.load(fo) fo.close() return dict batch1 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_1') batch2 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_2') batch3 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_3') batch4 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_4') batch5 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_5') training_tensors = [ numpy.concatenate((batch1['data'].reshape(10000,3,32,32), batch2['data'].reshape(10000,3,32,32), batch3['data'].reshape(10000,3,32,32), batch4['data'].reshape(10000,3,32,32) )), numpy.concatenate((batch1['labels'], batch2['labels'], batch3['labels'], batch4['labels'])) ] validation_tensors = [ batch5['data'].reshape(10000,3,32,32), numpy.asarray(batch5['labels']) ] if args.no_shuffle_dataset == False: def shuffle_in_unison_inplace(a, b): assert len(a) == len(b) p = numpy.random.permutation(len(a)) return a[p], b[p] [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1]) [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1]) all_images_shared = theano.shared(numpy.vstack([training_tensors[0],validation_tensors[0]])) all_labels_shared = theano.shared(numpy.concatenate([training_tensors[1],validation_tensors[1]])) length_training = training_tensors[0].shape[0] length_validation = validation_tensors[0].shape[0] indices_training = numpy.asarray(range(length_training)) indices_validation = numpy.asarray(range(length_training, length_training + length_validation)) indices_training_dataset = Dataset( tensors=[indices_training], names=['indices'], formats=[DenseFormat(axes=['b'],shape=[-1],dtype='int64')] ) indices_validation_dataset = Dataset( tensors=[indices_validation], names=['indices'], formats=[DenseFormat(axes=['b'],shape=[-1],dtype='int64')] ) indices_training_iterator = indices_training_dataset.iterator(iterator_type='sequential',batch_size=args.batch_size) indices_validation_iterator = indices_validation_dataset.iterator(iterator_type='sequential',batch_size=args.batch_size) mnist_validation_iterator = indices_validation_iterator mnist_training_iterator = indices_training_iterator input_indices_symbolic, = indices_training_iterator.make_input_nodes() image_lookup_node = ImageLookeupNode(input_indices_symbolic, all_images_shared) label_lookup_node = LabelLookeupNode(input_indices_symbolic, all_labels_shared) image_node = RescaleImage(image_lookup_node) image_node = Lcn(image_node) rng = numpy.random.RandomState(129734) theano_rng = RandomStreams(2387845) (conv_layers, affine_layers, output_node) = build_conv_classifier(image_node, filter_shapes, filter_counts, filter_init_uniform_ranges, pool_shapes, pool_strides, affine_output_sizes, affine_init_stddevs, dropout_include_rates, conv_pads, rng, theano_rng) loss_node = CrossEntropy(output_node, label_lookup_node) scalar_loss = loss_node.output_symbol.mean() if args.weight_decay != 0.0: for conv_layer in conv_layers: filters = conv_layer.conv2d_node.filters filter_loss = args.weight_decay * theano.tensor.sqr(filters).sum() scalar_loss = scalar_loss + filter_loss for affine_layer in affine_layers: weights = affine_layer.affine_node.linear_node.params weight_loss = args.weight_decay * theano.tensor.sqr(weights).sum() scalar_loss = scalar_loss + weight_loss max_epochs = 201 # # Makes parameter updaters # parameters = [] parameter_updaters = [] momentum_updaters = [] def add_updaters(parameter, scalar_loss, parameter_updaters, momentum_updaters): ''' Adds a ParameterUpdater to parameter_updaters, and a LinearlyInterpolatesOverEpochs to momentum_updaters. ''' gradient = theano.gradient.grad(scalar_loss, parameter) parameter_updaters.append(SgdParameterUpdater(parameter, gradient, args.learning_rate, args.initial_momentum, args.nesterov)) momentum_updaters.append(LinearlyInterpolatesOverEpochs( parameter_updaters[-1].momentum, args.final_momentum, args.epochs_to_momentum_saturation)) for conv_layer in conv_layers: filters = conv_layer.conv2d_node.filters parameters.append(filters) add_updaters(filters, scalar_loss, parameter_updaters, momentum_updaters) if args.max_filter_norm != numpy.inf: limit_param_norms(parameter_updaters[-1], filters, args.max_filter_norm, (1, 2, 3)) bias = conv_layer.bias_node.params parameters.append(bias) add_updaters(bias, scalar_loss, parameter_updaters, momentum_updaters) for affine_layer in affine_layers: weights = affine_layer.affine_node.linear_node.params parameters.append(weights) add_updaters(weights, scalar_loss, parameter_updaters, momentum_updaters) if args.max_col_norm != numpy.inf: limit_param_norms(parameter_updater=parameter_updaters[-1], param=weights, max_norm=args.max_col_norm, input_axes=[0]) biases = affine_layer.affine_node.bias_node.params parameters.append(biases) add_updaters(biases, scalar_loss, parameter_updaters, momentum_updaters) # # Makes batch and epoch callbacks # def make_output_filename(args, best=False): ''' Constructs a filename that reflects the command-line params. ''' assert_equal(os.path.splitext(args.output_prefix)[1], "") if os.path.isdir(args.output_prefix): output_dir, output_prefix = args.output_prefix, "" else: output_dir, output_prefix = os.path.split(args.output_prefix) assert_true(os.path.isdir(output_dir)) if output_prefix != "": output_prefix = output_prefix + "_" output_prefix = os.path.join(output_dir, output_prefix) return ("%slr-%g_mom-%g_nesterov-%s_bs-%d%s.pkl" % (output_prefix, args.learning_rate, args.initial_momentum, args.nesterov, args.batch_size, "_best" if best else "")) # Set up the loggers epoch_logger = EpochLogger(make_output_filename(args) + "_log.h5") misclassification_node = Misclassification(output_node, label_lookup_node) validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[]) epoch_logger.subscribe_to('validation mean loss', validation_loss_monitor) training_stopper = StopsOnStagnation(max_epochs=201, min_proportional_decrease=0.0) validation_misclassification_monitor = MeanOverEpoch(misclassification_node, callbacks=[print_misclassification_rate, training_stopper]) epoch_logger.subscribe_to('validation misclassification', validation_misclassification_monitor) # batch callback (monitor) #training_loss_logger = LogsToLists() training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss]) epoch_logger.subscribe_to("training loss", training_loss_monitor) training_misclassification_monitor = MeanOverEpoch(misclassification_node, callbacks=[]) epoch_logger.subscribe_to('training misclassification %', training_misclassification_monitor) epoch_timer = EpochTimer2() epoch_logger.subscribe_to('epoch duration', epoch_timer) # epoch_logger.subscribe_to('epoch time', # epoch_timer) ################# model = SerializableModel([input_indices_symbolic], [output_node]) saves_best = SavesAtMinimum(model, make_output_filename(args, best=True)) validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[saves_best]) epoch_logger.subscribe_to("Validation Loss", validation_loss_monitor) validation_callback = ValidationCallback( inputs=[input_indices_symbolic.output_symbol], input_iterator=mnist_validation_iterator, epoch_callbacks=[validation_loss_monitor, validation_misclassification_monitor]) loss_function = theano.function([input_indices_symbolic.output_symbol], scalar_loss) cost_args = mnist_validation_iterator.next() print(loss_function(*cost_args)) # trainer = Sgd((image_node.output_symbol, label_node.output_symbol), trainer = Sgd([input_indices_symbolic], mnist_training_iterator, callbacks=(parameter_updaters + [training_loss_monitor, training_misclassification_monitor, validation_callback])) ''' stuff_to_pickle = OrderedDict( (('model', model), ('validation_loss_logger', validation_loss_logger))) # Pickling the trainer doesn't work when there are Dropout nodes. # stuff_to_pickle = OrderedDict( # (('trainer', trainer), # ('validation_loss_logger', validation_loss_logger), # ('model', model))) trainer.epoch_callbacks += (momentum_updaters + [EpochTimer(), PicklesOnEpoch(stuff_to_pickle, make_output_filename(args), overwrite=False), validation_callback, LimitsNumEpochs(max_epochs)]) ''' trainer.epoch_callbacks += (momentum_updaters + [LimitsNumEpochs(max_epochs), epoch_timer]) start_time = time.time() trainer.train() elapsed_time = time.time() - start_time print("Total elapsed time is for training is: ", elapsed_time)
def main(): args = parse_args() # Hyperparameter values taken from Pylearn2: # In pylearn2/scripts/tutorials/multilayer_perceptron/: # multilayer_perceptron.ipynb # mlp_tutorial_part_3.yaml sizes = [500, 500, 10] sparse_init_counts = [15, 15] assert_equal(len(sparse_init_counts), len(sizes) - 1) assert_equal(sizes[-1], 10) mnist_training, mnist_testing = load_mnist() if args.validation_size == 0: # use testing set as validation set mnist_validation = mnist_testing else: # split training set into training and validation sets tensors = mnist_training.tensors training_tensors = [t[:-args.validation_size, ...] for t in tensors] validation_tensors = [t[-args.validation_size:, ...] for t in tensors] mnist_training = Dataset(tensors=training_tensors, names=mnist_training.names, formats=mnist_training.formats) mnist_validation = Dataset(tensors=validation_tensors, names=mnist_training.names, formats=mnist_training.formats) mnist_validation_iterator = mnist_validation.iterator( iterator_type='sequential', batch_size=args.batch_size) image_uint8_node, label_node = mnist_validation_iterator.make_input_nodes() image_node = CastNode(image_uint8_node, 'floatX') # image_node = RescaleImage(image_uint8_node) rng = numpy.random.RandomState(34523) theano_rng = RandomStreams(23845) (affine_nodes, output_node) = build_fc_classifier(image_node, sizes, sparse_init_counts, args.dropout_include_rates, rng, theano_rng) loss_node = CrossEntropy(output_node, label_node) loss_sum = loss_node.output_symbol.mean() max_epochs = 10000 # # Makes parameter updaters # parameters = [] parameter_updaters = [] momentum_updaters = [] for affine_node in affine_nodes: for params in (affine_node.linear_node.params, affine_node.bias_node.params): parameters.append(params) gradients = theano.gradient.grad(loss_sum, params) parameter_updater = SgdParameterUpdater(params, gradients, args.learning_rate, args.initial_momentum, args.nesterov) parameter_updaters.append(parameter_updater) momentum_updaters.append(LinearlyInterpolatesOverEpochs( parameter_updater.momentum, args.final_momentum, args.epochs_to_momentum_saturation)) # # Makes batch and epoch callbacks # def make_output_basename(args): assert_equal(os.path.splitext(args.output_prefix)[1], "") if os.path.isdir(args.output_prefix) and \ not args.output_prefix.endswith('/'): args.output_prefix += '/' output_dir, output_prefix = os.path.split(args.output_prefix) if output_prefix != "": output_prefix = output_prefix + "_" output_prefix = os.path.join(output_dir, output_prefix) return "{}lr-{}_mom-{}_nesterov-{}_bs-{}".format( output_prefix, args.learning_rate, args.initial_momentum, args.nesterov, args.batch_size) epoch_logger = EpochLogger(make_output_basename(args) + "_log.h5") # misclassification_node = Misclassification(output_node, label_node) # mcr_logger = LogsToLists() # training_stopper = StopsOnStagnation(max_epochs=10, # min_proportional_decrease=0.0) misclassification_node = Misclassification(output_node, label_node) validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[]) epoch_logger.subscribe_to('validation mean loss', validation_loss_monitor) validation_misclassification_monitor = MeanOverEpoch( misclassification_node, callbacks=[print_mcr, StopsOnStagnation(max_epochs=10, min_proportional_decrease=0.0)]) epoch_logger.subscribe_to('validation misclassification', validation_misclassification_monitor) # batch callback (monitor) # training_loss_logger = LogsToLists() training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss]) epoch_logger.subscribe_to('training mean loss', training_loss_monitor) training_misclassification_monitor = MeanOverEpoch(misclassification_node, callbacks=[]) epoch_logger.subscribe_to('training misclassification %', training_misclassification_monitor) # epoch callbacks # validation_loss_logger = LogsToLists() def make_output_filename(args, best=False): basename = make_output_basename(args) return "{}{}.pkl".format(basename, '_best' if best else "") model = SerializableModel([image_uint8_node], [output_node]) saves_best = SavesAtMinimum(model, make_output_filename(args, best=True)) validation_loss_monitor = MeanOverEpoch( loss_node, callbacks=[saves_best]) epoch_logger.subscribe_to('validation loss', validation_loss_monitor) validation_callback = ValidationCallback( inputs=[image_uint8_node.output_symbol, label_node.output_symbol], input_iterator=mnist_validation_iterator, epoch_callbacks=[validation_loss_monitor, validation_misclassification_monitor]) trainer = Sgd([image_uint8_node, label_node], mnist_training.iterator(iterator_type='sequential', batch_size=args.batch_size), callbacks=(parameter_updaters + momentum_updaters + [training_loss_monitor, training_misclassification_monitor, validation_callback, LimitsNumEpochs(max_epochs)])) # validation_loss_monitor])) # stuff_to_pickle = OrderedDict( # (('model', model), # ('validation_loss_logger', validation_loss_logger))) # Pickling the trainer doesn't work when there are Dropout nodes. # stuff_to_pickle = OrderedDict( # (('trainer', trainer), # ('validation_loss_logger', validation_loss_logger), # ('model', model))) # trainer.epoch_callbacks += (momentum_updaters + # [PicklesOnEpoch(stuff_to_pickle, # make_output_filename(args), # overwrite=False), # validation_callback, # LimitsNumEpochs(max_epochs)]) trainer.train()
def main(): ''' Entry point of this script. ''' args = parse_args() # Hyperparameter values taken from Pylearn2: # In pylearn2/scripts/tutorials/convolutional_network/: # convolutional_network.ipynb filter_counts = [96, 192, 192] filter_init_uniform_ranges = [0.005]* len(filter_counts) filter_shapes = [(8, 8), (8,8), (5, 5)] pool_shapes = [(4, 4),(4, 4), (2, 2)] pool_strides = [(2, 2), (2, 2), (2,2)] pool_pads = [(2,2), (2,2), (2,2)] affine_output_sizes = [10] affine_init_stddevs = [.005] * len(affine_output_sizes) dropout_include_rates = [0.8, 0.5, 0.5, 0.5] #dropout_include_rates = ([.8 if args.dropout else 1.0] * # (len(filter_counts) + len(affine_output_sizes))) conv_pads = [(4, 4), (3, 3), (3, 3)] assert_equal(affine_output_sizes[-1], 10) def unpickle(file): import cPickle fo = open(file, 'rb') dict = cPickle.load(fo) fo.close() return dict batch1 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_1') batch2 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_2') batch3 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_3') batch4 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_4') batch5 = unpickle('/home/s1422538/datasets/simplelearn/cifar10/original_files/cifar-10-batches-py/data_batch_5') training_tensors = [ numpy.concatenate((batch1['data'].reshape(10000,3,32,32), batch2['data'].reshape(10000,3,32,32), batch3['data'].reshape(10000,3,32,32), batch4['data'].reshape(10000,3,32,32) )), numpy.concatenate((batch1['labels'], batch2['labels'], batch3['labels'], batch4['labels'])) ] validation_tensors = [ batch5['data'].reshape(10000,3,32,32), numpy.asarray(batch5['labels']) ] shuffle_dataset = True if shuffle_dataset == True: def shuffle_in_unison_inplace(a, b): assert len(a) == len(b) p = numpy.random.permutation(len(a)) return a[p], b[p] [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1]) [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1]) cifar10_training = Dataset(tensors=training_tensors, names=('images', 'labels'), formats=(DenseFormat(axes=('b', 'c', '0', '1'), shape=(-1,3, 32, 32), dtype='uint8'), DenseFormat(axes=('b',), shape=(-1, ), dtype='int64'))) cifar10_validation = Dataset(tensors=validation_tensors, names=('images', 'labels'), formats=(DenseFormat(axes=('b', 'c', '0', '1'), shape=(-1,3, 32, 32), dtype='uint8'), DenseFormat(axes=('b',), shape=(-1, ), dtype='int64'))) cifar10_validation_iterator = cifar10_validation.iterator( iterator_type='sequential', loop_style='divisible', batch_size=args.batch_size) image_uint8_node, label_node = cifar10_validation_iterator.make_input_nodes() image_node = RescaleImage(image_uint8_node) image_node_lcn = Lcn(image_node) # image_node = RescaleImage(image_uint8_node) rng = numpy.random.RandomState(3447523) theano_rng = RandomStreams(2387345) (conv_layers, affine_layers, output_node) = build_conv_classifier(image_node_lcn, filter_shapes, filter_counts, filter_init_uniform_ranges, pool_shapes, pool_strides, pool_pads, affine_output_sizes, affine_init_stddevs, dropout_include_rates, conv_pads, rng, theano_rng) loss_node = CrossEntropy(output_node, label_node) scalar_loss = loss_node.output_symbol.mean() if args.weight_decay != 0.0: for conv_layer in conv_layers: filters = conv_layer.conv2d_node.filters filter_loss = args.weight_decay * theano.tensor.sqr(filters).sum() scalar_loss = scalar_loss + filter_loss for affine_layer in affine_layers: weights = affine_layer.affine_node.linear_node.params weight_loss = args.weight_decay * theano.tensor.sqr(weights).sum() scalar_loss = scalar_loss + weight_loss max_epochs = 500 # # Makes parameter updaters # parameters = [] parameter_updaters = [] momentum_updaters = [] def add_updaters(parameter, scalar_loss, parameter_updaters, momentum_updaters): ''' Adds a ParameterUpdater to parameter_updaters, and a LinearlyInterpolatesOverEpochs to momentum_updaters. ''' gradient = theano.gradient.grad(scalar_loss, parameter) parameter_updaters.append(SgdParameterUpdater(parameter, gradient, args.learning_rate, args.initial_momentum, not args.no_nesterov)) momentum_updaters.append(LinearlyInterpolatesOverEpochs( parameter_updaters[-1].momentum, args.final_momentum, args.epochs_to_momentum_saturation)) for conv_layer in conv_layers: filters = conv_layer.conv2d_node.filters parameters.append(filters) add_updaters(filters, scalar_loss, parameter_updaters, momentum_updaters) if args.max_filter_norm != numpy.inf: limit_param_norms(parameter_updaters[-1], filters, args.max_filter_norm, (1, 2, 3)) bias = conv_layer.bias_node.params parameters.append(bias) add_updaters(bias, scalar_loss, parameter_updaters, momentum_updaters) for affine_layer in affine_layers: weights = affine_layer.affine_node.linear_node.params parameters.append(weights) add_updaters(weights, scalar_loss, parameter_updaters, momentum_updaters) if args.max_col_norm != numpy.inf: limit_param_norms(parameter_updater=parameter_updaters[-1], param=weights, max_norm=args.max_col_norm, input_axes=[0]) biases = affine_layer.affine_node.bias_node.params parameters.append(biases) add_updaters(biases, scalar_loss, parameter_updaters, momentum_updaters) # # Makes batch and epoch callbacks # def make_misclassification_monitor(): ''' Returns an MeanOverEpoch of the misclassification rate. ''' misclassification_node = Misclassification(output_node, label_node) mcr_logger = LogsToLists() training_stopper = StopsOnStagnation(max_epochs=10, min_proportional_decrease=0.0) return MeanOverEpoch(misclassification_node, callbacks=[print_misclassification_rate, mcr_logger, training_stopper]) mcr_monitor = make_misclassification_monitor() # batch callback (monitor) training_loss_logger = LogsToLists() training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss, training_loss_logger]) # epoch callbacks validation_loss_logger = LogsToLists() def make_output_filename(args, best=False): ''' Constructs a filename that reflects the command-line params. ''' assert_equal(os.path.splitext(args.output_prefix)[1], "") if os.path.isdir(args.output_prefix): output_dir, output_prefix = args.output_prefix, "" else: output_dir, output_prefix = os.path.split(args.output_prefix) assert_true(os.path.isdir(output_dir)) if output_prefix != "": output_prefix = output_prefix + "_" output_prefix = os.path.join(output_dir, output_prefix) return ("%slr-%g_mom-%g_nesterov-%s_bs-%d%s.pkl" % (output_prefix, args.learning_rate, args.initial_momentum, not args.no_nesterov, args.batch_size, "_best" if best else "")) model = SerializableModel([image_uint8_node], [output_node]) saves_best = SavesAtMinimum(model, make_output_filename(args, best=True)) validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[validation_loss_logger, saves_best]) validation_callback = ValidationCallback( inputs=[image_uint8_node.output_symbol, label_node.output_symbol], input_iterator=cifar10_validation_iterator, epoch_callbacks=[validation_loss_monitor, mcr_monitor]) # trainer = Sgd((image_node.output_symbol, label_node.output_symbol), trainer = Sgd([image_uint8_node, label_node], cifar10_training.iterator(iterator_type='sequential', loop_style='divisible', batch_size=args.batch_size), callbacks=(parameter_updaters + [training_loss_monitor])) stuff_to_pickle = OrderedDict( (('model', model), ('validation_loss_logger', validation_loss_logger))) # Pickling the trainer doesn't work when there are Dropout nodes. # stuff_to_pickle = OrderedDict( # (('trainer', trainer), # ('validation_loss_logger', validation_loss_logger), # ('model', model))) trainer.epoch_callbacks += (momentum_updaters + [PicklesOnEpoch(stuff_to_pickle, make_output_filename(args), overwrite=False), validation_callback, LimitsNumEpochs(max_epochs), epoch_timer]) trainer.train()
def main(): ''' Entry point of this script. ''' args = parse_args() # Hyperparameter values taken from Pylearn2: # In pylearn2/scripts/tutorials/convolutional_network/: # convolutional_network.ipynb filter_counts = [64, 64] filter_init_uniform_ranges = [.05] * len(filter_counts) filter_shapes = [(5, 5), (5, 5)] pool_shapes = [(4, 4), (4, 4)] pool_strides = [(2, 2), (2, 2)] affine_output_sizes = [10] affine_init_stddevs = [.05] * len(affine_output_sizes) dropout_include_rates = ([.5 if args.dropout else 1.0] * (len(filter_counts) + len(affine_output_sizes))) assert_equal(affine_output_sizes[-1], 10) mnist_training, mnist_testing = load_mnist() if args.validation_size == 0: # use testing set as validation set mnist_validation = mnist_testing else: # split training set into training and validation sets tensors = mnist_training.tensors training_tensors = [t[:-args.validation_size, ...] for t in tensors] validation_tensors = [t[-args.validation_size:, ...] for t in tensors] mnist_training = Dataset(tensors=training_tensors, names=mnist_training.names, formats=mnist_training.formats) mnist_validation = Dataset(tensors=validation_tensors, names=mnist_training.names, formats=mnist_training.formats) mnist_validation_iterator = mnist_validation.iterator( iterator_type='sequential', loop_style='divisible', batch_size=args.batch_size) image_uint8_node, label_node = mnist_validation_iterator.make_input_nodes() image_node = RescaleImage(image_uint8_node) rng = numpy.random.RandomState(1234) theano_rng = RandomStreams(23845) (conv_layers, affine_layers, output_node) = build_conv_classifier(image_node, filter_shapes, filter_counts, filter_init_uniform_ranges, pool_shapes, pool_strides, affine_output_sizes, affine_init_stddevs, dropout_include_rates, rng, theano_rng) loss_node = CrossEntropy(output_node, label_node) scalar_loss = loss_node.output_symbol.mean() if args.weight_decay != 0.0: for conv_layer in conv_layers: filters = conv_layer.conv2d_node.filters filter_loss = args.weight_decay * theano.tensor.sqr(filters).sum() scalar_loss = scalar_loss + filter_loss for affine_layer in affine_layers: weights = affine_layer.affine_node.linear_node.params weight_loss = args.weight_decay * theano.tensor.sqr(weights).sum() scalar_loss = scalar_loss + weight_loss max_epochs = 500 # # Makes parameter updaters # parameters = [] parameter_updaters = [] momentum_updaters = [] def add_updaters(parameter, scalar_loss, parameter_updaters, momentum_updaters): ''' Adds a ParameterUpdater to parameter_updaters, and a LinearlyInterpolatesOverEpochs to momentum_updaters. ''' gradient = theano.gradient.grad(scalar_loss, parameter) parameter_updaters.append(SgdParameterUpdater(parameter, gradient, args.learning_rate, args.initial_momentum, not args.no_nesterov)) momentum_updaters.append(LinearlyInterpolatesOverEpochs( parameter_updaters[-1].momentum, args.final_momentum, args.epochs_to_momentum_saturation)) for conv_layer in conv_layers: filters = conv_layer.conv2d_node.filters parameters.append(filters) add_updaters(filters, scalar_loss, parameter_updaters, momentum_updaters) if args.max_filter_norm != numpy.inf: limit_param_norms(parameter_updaters[-1], filters, args.max_filter_norm, (1, 2, 3)) bias = conv_layer.bias_node.params parameters.append(bias) add_updaters(bias, scalar_loss, parameter_updaters, momentum_updaters) for affine_layer in affine_layers: weights = affine_layer.affine_node.linear_node.params parameters.append(weights) add_updaters(weights, scalar_loss, parameter_updaters, momentum_updaters) if args.max_col_norm != numpy.inf: limit_param_norms(parameter_updater=parameter_updaters[-1], param=weights, max_norm=args.max_col_norm, input_axes=[0]) biases = affine_layer.affine_node.bias_node.params parameters.append(biases) add_updaters(biases, scalar_loss, parameter_updaters, momentum_updaters) # # Makes batch and epoch callbacks # def make_misclassification_monitor(): ''' Returns an MeanOverEpoch of the misclassification rate. ''' misclassification_node = Misclassification(output_node, label_node) mcr_logger = LogsToLists() training_stopper = StopsOnStagnation(max_epochs=10, min_proportional_decrease=0.0) return MeanOverEpoch(misclassification_node, callbacks=[print_misclassification_rate, mcr_logger, training_stopper]) mcr_monitor = make_misclassification_monitor() # batch callback (monitor) training_loss_logger = LogsToLists() training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss, training_loss_logger]) # epoch callbacks validation_loss_logger = LogsToLists() def make_output_filename(args, best=False): ''' Constructs a filename that reflects the command-line params. ''' assert_equal(os.path.splitext(args.output_prefix)[1], "") if os.path.isdir(args.output_prefix): output_dir, output_prefix = args.output_prefix, "" else: output_dir, output_prefix = os.path.split(args.output_prefix) assert_true(os.path.isdir(output_dir)) if output_prefix != "": output_prefix = output_prefix + "_" output_prefix = os.path.join(output_dir, output_prefix) return ("%slr-%g_mom-%g_nesterov-%s_bs-%d%s.pkl" % (output_prefix, args.learning_rate, args.initial_momentum, not args.no_nesterov, args.batch_size, "_best" if best else "")) model = SerializableModel([image_uint8_node], [output_node]) saves_best = SavesAtMinimum(model, make_output_filename(args, best=True)) validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[validation_loss_logger, saves_best]) validation_callback = ValidationCallback( inputs=[image_uint8_node.output_symbol, label_node.output_symbol], input_iterator=mnist_validation_iterator, epoch_callbacks=[validation_loss_monitor, mcr_monitor]) # trainer = Sgd((image_node.output_symbol, label_node.output_symbol), trainer = Sgd([image_uint8_node, label_node], mnist_training.iterator(iterator_type='sequential', loop_style='divisible', batch_size=args.batch_size), callbacks=(parameter_updaters + [training_loss_monitor])) stuff_to_pickle = OrderedDict( (('model', model), ('validation_loss_logger', validation_loss_logger))) # Pickling the trainer doesn't work when there are Dropout nodes. # stuff_to_pickle = OrderedDict( # (('trainer', trainer), # ('validation_loss_logger', validation_loss_logger), # ('model', model))) trainer.epoch_callbacks += (momentum_updaters + [EpochTimer(), PicklesOnEpoch(stuff_to_pickle, make_output_filename(args), overwrite=False), validation_callback, LimitsNumEpochs(max_epochs)]) trainer.train()
def main(): args = parse_args() # Hyperparameter values taken from Pylearn2: # In pylearn2/scripts/tutorials/multilayer_perceptron/: # multilayer_perceptron.ipynb # mlp_tutorial_part_3.yaml sizes = [500, 500, 10] sparse_init_counts = [15, 15] assert_equal(len(sparse_init_counts), len(sizes) - 1) assert_equal(sizes[-1], 10) ''' mnist_training, mnist_testing = load_mnist() if args.validation_size == 0: # use testing set as validation set mnist_validation = mnist_testing else: # split training set into training and validation sets tensors = mnist_training.tensors size_tensors = tensors[0].shape[0] training_tensors = [t[:-args.validation_size, ...] for t in tensors] validation_tensors = [t[size_tensors - args.validation_size:, ...] for t in tensors] shuffle_dataset = True if shuffle_dataset == True: def shuffle_in_unison_inplace(a, b): assert len(a) == len(b) p = numpy.random.permutation(len(a)) return a[p], b[p] [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1]) [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1]) ''' def unpickle(file): import cPickle fo = open(file, 'rb') dict = cPickle.load(fo) fo.close() return dict batch1 = unpickle('/home/paul/cifar-10-batches-py/data_batch_1') batch2 = unpickle('/home/paul/cifar-10-batches-py/data_batch_2') batch3 = unpickle('/home/paul/cifar-10-batches-py/data_batch_3') batch4 = unpickle('/home/paul/cifar-10-batches-py/data_batch_4') batch5 = unpickle('/home/paul/cifar-10-batches-py/data_batch_5') training_tensors = [ numpy.concatenate((batch1['data'].reshape(10000,3,32,32), batch2['data'].reshape(10000,3,32,32), batch3['data'].reshape(10000,3,32,32), batch4['data'].reshape(10000,3,32,32) )), numpy.concatenate((batch1['labels'], batch2['labels'], batch3['labels'], batch4['labels'])) ] validation_tensors = [ batch5['data'].reshape(10000,3,32,32), numpy.asarray(batch5['labels']) ] shuffle_dataset = True if shuffle_dataset == True: def shuffle_in_unison_inplace(a, b): assert len(a) == len(b) p = numpy.random.permutation(len(a)) return a[p], b[p] [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1]) [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1]) cifar10_training = Dataset(tensors=training_tensors, names=('images', 'labels'), formats=(DenseFormat(axes=('b', 'c', '0', '1'), shape=(-1,3, 32, 32), dtype='uint8'), DenseFormat(axes=('b',), shape=(-1, ), dtype='int64'))) cifar10_validation = Dataset(tensors=validation_tensors, names=('images', 'labels'), formats=(DenseFormat(axes=('b', 'c', '0', '1'), shape=(-1,3, 32, 32), dtype='uint8'), DenseFormat(axes=('b',), shape=(-1, ), dtype='int64'))) cifar10_validation_iterator = cifar10_validation.iterator( iterator_type='sequential', batch_size=args.batch_size) image_uint8_node, label_node = cifar10_validation_iterator.make_input_nodes() image_node = CastNode(image_uint8_node, 'floatX') image_node_lcn = Lcn(image_node) # image_node = RescaleImage(image_uint8_node) rng = numpy.random.RandomState(3447523) theano_rng = RandomStreams(2387345) (affine_nodes, output_node) = build_fc_classifier(image_node_lcn, sizes, sparse_init_counts, args.dropout_include_rates, rng, theano_rng) loss_node = CrossEntropy(output_node, label_node) loss_sum = loss_node.output_symbol.mean() max_epochs = 10000 # # Makes parameter updaters # parameters = [] parameter_updaters = [] momentum_updaters = [] for affine_node in affine_nodes: for params in (affine_node.linear_node.params, affine_node.bias_node.params): parameters.append(params) gradients = theano.gradient.grad(loss_sum, params) parameter_updater = SgdParameterUpdater(params, gradients, args.learning_rate, args.initial_momentum, args.nesterov) parameter_updaters.append(parameter_updater) momentum_updaters.append(LinearlyInterpolatesOverEpochs( parameter_updater.momentum, args.final_momentum, args.epochs_to_momentum_saturation)) # # Makes batch and epoch callbacks # def make_output_basename(args): assert_equal(os.path.splitext(args.output_prefix)[1], "") if os.path.isdir(args.output_prefix) and \ not args.output_prefix.endswith('/'): args.output_prefix += '/' output_dir, output_prefix = os.path.split(args.output_prefix) if output_prefix != "": output_prefix = output_prefix + "_" output_prefix = os.path.join(output_dir, output_prefix) return "{}lr-{}_mom-{}_nesterov-{}_bs-{}".format( output_prefix, args.learning_rate, args.initial_momentum, args.nesterov, args.batch_size) epoch_logger = EpochLogger(make_output_basename(args) + "_log.h5") # misclassification_node = Misclassification(output_node, label_node) # mcr_logger = LogsToLists() # training_stopper = StopsOnStagnation(max_epochs=10, # min_proportional_decrease=0.0) misclassification_node = Misclassification(output_node, label_node) validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[]) epoch_logger.subscribe_to('validation mean loss', validation_loss_monitor) validation_misclassification_monitor = MeanOverEpoch( misclassification_node, callbacks=[print_mcr, StopsOnStagnation(max_epochs=10, min_proportional_decrease=0.0)]) epoch_logger.subscribe_to('validation misclassification', validation_misclassification_monitor) # batch callback (monitor) # training_loss_logger = LogsToLists() training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss]) epoch_logger.subscribe_to('training mean loss', training_loss_monitor) training_misclassification_monitor = MeanOverEpoch(misclassification_node, callbacks=[]) epoch_logger.subscribe_to('training misclassification %', training_misclassification_monitor) # epoch callbacks # validation_loss_logger = LogsToLists() def make_output_filename(args, best=False): basename = make_output_basename(args) return "{}{}.pkl".format(basename, '_best' if best else "") model = SerializableModel([image_uint8_node], [output_node]) saves_best = SavesAtMinimum(model, make_output_filename(args, best=True)) validation_loss_monitor = MeanOverEpoch( loss_node, callbacks=[saves_best]) epoch_logger.subscribe_to('validation loss', validation_loss_monitor) validation_callback = ValidationCallback( inputs=[image_uint8_node.output_symbol, label_node.output_symbol], input_iterator=cifar10_validation_iterator, epoch_callbacks=[validation_loss_monitor, validation_misclassification_monitor]) trainer = Sgd([image_uint8_node, label_node], cifar10_training.iterator(iterator_type='sequential', batch_size=args.batch_size), callbacks=(parameter_updaters + momentum_updaters + [training_loss_monitor, training_misclassification_monitor, validation_callback, LimitsNumEpochs(max_epochs)])) # validation_loss_monitor])) # stuff_to_pickle = OrderedDict( # (('model', model), # ('validation_loss_logger', validation_loss_logger))) # Pickling the trainer doesn't work when there are Dropout nodes. # stuff_to_pickle = OrderedDict( # (('trainer', trainer), # ('validation_loss_logger', validation_loss_logger), # ('model', model))) # trainer.epoch_callbacks += (momentum_updaters + # [PicklesOnEpoch(stuff_to_pickle, # make_output_filename(args), # overwrite=False), # validation_callback, # LimitsNumEpochs(max_epochs)]) trainer.train()