training_stopper]) # epoch_logger.subscribe_to('validation misclassification', # validation_misclassification_monitor) # Gets called by trainer between training epochs. validation_callback = ValidationCallback(inputs=[input_indices_symbolic.output_symbol], input_iterator=mnist_validation_iterator, epoch_callbacks=[misclassification_rate_monitor]) trainer = Bgfs(inputs=[input_indices_symbolic], parameters=params_flat, old_parameters=params_old_flat, gradient=gradient_symbol, learning_rate=.3, training_iterator=mnist_training_iterator, training_set=indices_training_dataset, scalar_loss=scalar_loss_symbol, epoch_callbacks=[validation_callback, # measure validation misclassification rate, quit if it stops falling LimitsNumEpochs(1000), EpochTimer()]) # perform no more than 100 epochs start_time = time.time() trainer.train() elapsed_time = time.time() - start_time plt.plot(_classification_errors) plt.show() print("The time elapsed for training is ", elapsed_time)
validation_iter = validation_set.iterator(iterator_type="sequential", batch_size=100) # Gets called by trainer between training epochs. validation_callback = ValidationCallback( inputs=[image_node.output_symbol, label_node.output_symbol], input_iterator=validation_iter, monitors=[misclassification_rate_monitor], ) trainer = Bgfs( inputs=[image_node, label_node], parameters=params_flat, old_parameters=params_old_flat, gradient=gradient_symbol, gradient_at_old_params=gradient_symbol_old_params, learning_rate=0.3, training_iterator=training_iter, monitors=[], epoch_callbacks=[ validation_callback, # measure validation misclassification rate, quit if it stops falling LimitsNumEpochs(100), ], ) # perform no more than 100 epochs start_time = time.time() _classification_errors = trainer.train() print _classification_errors elapsed_time = time.time() - start_time plt.plot(_classification_errors) plt.show()
def main(): ''' Entry point of this script. ''' args = parse_args() # Hyperparameter values taken from Pylearn2: # In pylearn2/scripts/tutorials/convolutional_network/: # convolutional_network.ipynb filter_counts = [64, 64] filter_init_uniform_ranges = [.05] * len(filter_counts) filter_shapes = [(5, 5), (5, 5)] pool_shapes = [(4, 4), (4, 4)] pool_strides = [(2, 2), (2, 2)] affine_output_sizes = [10] affine_init_stddevs = [.05] * len(affine_output_sizes) dropout_include_rates = ([.5 if args.dropout else 1.0] * (len(filter_counts) + len(affine_output_sizes))) assert_equal(affine_output_sizes[-1], 10) mnist_training, mnist_testing = load_mnist() if args.validation_size == 0: # use testing set as validation set mnist_validation = mnist_testing else: # split training set into training and validation sets tensors = mnist_training.tensors training_tensors = [t[:-args.validation_size, ...] for t in tensors] validation_tensors = [t[-args.validation_size:, ...] for t in tensors] if args.shuffle_dataset == True: def shuffle_in_unison_inplace(a, b): assert len(a) == len(b) p = numpy.random.permutation(len(a)) return a[p], b[p] [training_tensors[0],training_tensors[1]] = shuffle_in_unison_inplace(training_tensors[0],training_tensors[1]) [validation_tensors[0], validation_tensors[1]] = shuffle_in_unison_inplace(validation_tensors[0], validation_tensors[1]) mnist_training = Dataset(tensors=training_tensors, names=mnist_training.names, formats=mnist_training.formats) mnist_validation = Dataset(tensors=validation_tensors, names=mnist_training.names, formats=mnist_training.formats) mnist_validation_iterator = mnist_validation.iterator( iterator_type='sequential', loop_style='divisible', batch_size=args.batch_size) mnist_training_iterator = mnist_training.iterator( iterator_type='sequential', loop_style='divisible', batch_size=args.batch_size) image_uint8_node, label_node = mnist_validation_iterator.make_input_nodes() image_node = RescaleImage(image_uint8_node) rng = numpy.random.RandomState(129734) theano_rng = RandomStreams(2387845) (conv_layers, affine_layers, output_node, params_flat, params_old_flat) = build_conv_classifier(image_node, filter_shapes, filter_counts, filter_init_uniform_ranges, pool_shapes, pool_strides, affine_output_sizes, affine_init_stddevs, dropout_include_rates, rng, theano_rng) loss_node = CrossEntropy(output_node, label_node) scalar_loss = loss_node.output_symbol.mean() # scalar_loss2 = theano.clone(scalar_loss, replace = {params_flat: params_old_flat}) if args.weight_decay != 0.0: for conv_layer in conv_layers: filters = conv_layer.conv2d_node.filters filter_loss = args.weight_decay * theano.tensor.sqr(filters).sum() scalar_loss = scalar_loss + filter_loss for affine_layer in affine_layers: weights = affine_layer.affine_node.linear_node.params weight_loss = args.weight_decay * theano.tensor.sqr(weights).sum() scalar_loss = scalar_loss + weight_loss max_epochs = 500 # # Makes parameter updater # gradient = theano.gradient.grad(scalar_loss, params_flat) loss_function = theano.function([image_uint8_node.output_symbol, label_node.output_symbol],scalar_loss) gradient_function = theano.function([image_uint8_node.output_symbol, label_node.output_symbol],gradient) cost_arguments = mnist_training_iterator.next() print(loss_function(*cost_arguments)) grads = gradient_function(*cost_arguments) print(grads) print(grads.shape) # # Makes batch and epoch callbacks # def make_output_filename(args, best=False): ''' Constructs a filename that reflects the command-line params. ''' assert_equal(os.path.splitext(args.output_prefix)[1], "") if os.path.isdir(args.output_prefix): output_dir, output_prefix = args.output_prefix, "" else: output_dir, output_prefix = os.path.split(args.output_prefix) assert_true(os.path.isdir(output_dir)) if output_prefix != "": output_prefix = output_prefix + "_" output_prefix = os.path.join(output_dir, output_prefix) return ("%slr-%g_mom-%g_nesterov-%s_bs-%d%s.pkl" % (output_prefix, args.learning_rate, args.initial_momentum, not args.no_nesterov, args.batch_size, "_best" if best else "")) # Set up the loggers epoch_logger = EpochLogger(make_output_filename(args) + "_log.h5") misclassification_node = Misclassification(output_node, label_node) validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[]) epoch_logger.subscribe_to('validation mean loss', validation_loss_monitor) training_stopper = StopsOnStagnation(max_epochs=100, min_proportional_decrease=0.0) validation_misclassification_monitor = MeanOverEpoch(misclassification_node, callbacks=[print_misclassification_rate, training_stopper]) epoch_logger.subscribe_to('validation misclassification', validation_misclassification_monitor) # batch callback (monitor) #training_loss_logger = LogsToLists() training_loss_monitor = MeanOverEpoch(loss_node, callbacks=[print_loss]) epoch_logger.subscribe_to("training loss", training_loss_monitor) training_misclassification_monitor = MeanOverEpoch(misclassification_node, callbacks=[]) epoch_logger.subscribe_to('training misclassification %', training_misclassification_monitor) epoch_timer = EpochTimer() # epoch_logger.subscribe_to('epoch time', # epoch_timer) ################# model = SerializableModel([image_uint8_node], [output_node]) saves_best = SavesAtMinimum(model, make_output_filename(args, best=True)) validation_loss_monitor = MeanOverEpoch(loss_node, callbacks=[saves_best]) epoch_logger.subscribe_to("Validation Loss", validation_loss_monitor) validation_callback = ValidationCallback( inputs=[image_uint8_node.output_symbol, label_node.output_symbol], input_iterator=mnist_validation_iterator, epoch_callbacks=[validation_loss_monitor, validation_misclassification_monitor]) # trainer = Sgd((image_node.output_symbol, label_node.output_symbol), trainer = Bgfs(inputs=[image_node, label_node], parameters=params_flat, old_parameters=params_old_flat, gradient=gradient, learning_rate=args.learning_rate, training_iterator=mnist_training_iterator, training_set = mnist_training, scalar_loss=scalar_loss, epoch_callbacks=([ #training_loss_monitor, # training_misclassification_monitor, validation_callback, LimitsNumEpochs(max_epochs), EpochTimer()])) ''' stuff_to_pickle = OrderedDict( (('model', model), ('validation_loss_logger', validation_loss_logger))) # Pickling the trainer doesn't work when there are Dropout nodes. # stuff_to_pickle = OrderedDict( # (('trainer', trainer), # ('validation_loss_logger', validation_loss_logger), # ('model', model))) trainer.epoch_callbacks += (momentum_updaters + [EpochTimer(), PicklesOnEpoch(stuff_to_pickle, make_output_filename(args), overwrite=False), validation_callback, LimitsNumEpochs(max_epochs)]) ''' start_time = time.time() trainer.train() elapsed_time = time.time() - start_time print("Total elapsed time is for training is: ", elapsed_time)