def run(initializer, model_path): model = NeuralClassifier(input_dim=28 * 28) for _ in range(6): model.stack(Dense(128, 'relu', init=initializer)) model.stack(Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer(trainer) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(model_path)
def _initialize_impl(self, X, y=None): assert not self.is_initialized,\ "This neural network has already been initialized." self._create_specs(X, y) self._create_mlp() if y is None: return if self.valid_size > 0.0: assert self.valid_set is None, "Can't specify valid_size and valid_set together." X, X_v, y, y_v = sklearn.cross_validation.train_test_split( X, y, test_size=self.valid_size, random_state=self.random_state) self.valid_set = X_v, y_v self.train_set = X, y self.trainer = MomentumTrainer(self.mlp) self.controllers = [ self, LearningRateAnnealer(self.trainer, patience=self.n_stable, anneal_times=0)]
def _initialize(self, X, y=None): assert not self.is_initialized,\ "This neural network has already been initialized." self._create_specs(X, y) self._create_mlp() if y is None: return if self.valid_size > 0.0: assert self.valid_set is None, "Can't specify valid_size and valid_set together." X, X_v, y, y_v = sklearn.cross_validation.train_test_split( X, y, test_size=self.valid_size, random_state=self.random_state) self.valid_set = X_v, y_v self.train_set = X, y self.trainer = MomentumTrainer(self.mlp) self.controllers = [ self, LearningRateAnnealer(self.trainer, patience=self.n_stable, anneal_times=0)]
expanded_train_set = [] for img, label in mnist.train_set(): expanded_train_set.append((img, label)) original_img = (img * 256).reshape((28, 28)) transformed_img = (elastic_distortion(original_img) / 256).flatten() expanded_train_set.append((transformed_img, label)) global_rand.shuffle(expanded_train_set) expanded_mnist = BasicDataset(train=expanded_train_set, valid=mnist.valid_set(), test=mnist.test_set()) logging.info("expanded training data size: %d" % len(expanded_train_set)) if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(256, 'relu'), Dense(256, 'relu'), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer() mnist = MiniBatches(expanded_mnist, batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(default_model)
default_model = os.path.join(os.path.dirname(__file__), "models", "deep_conv.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack( # Reshape to 3D tensor Reshape((-1, 28, 28)), # Add a new dimension for convolution DimShuffle((0, 'x', 1, 2)), Convolution((4, 1, 5, 5), activation="relu"), Dropout(0.15), Convolution((8, 4, 5, 5), activation="relu"), Dropout(0.1), Convolution((16, 8, 3, 3), activation="relu"), Flatten(), Dropout(0.1), # As dimension information was lost, reveal it to the pipe line RevealDimension(16), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer() mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(default_model)
Classify MNIST digits using a very deep think network. Plain deep networks are very hard to be trained, as shown in this case. But we should notice that if highway layers just learn to pass information forward, in other words, just be transparent layers, then they would be meaningless. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax from deepy.trainers import MomentumTrainer, LearningRateAnnealer model_path = os.path.join(os.path.dirname(__file__), "models", "baseline1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) for _ in range(20): model.stack(Dense(71, 'relu')) model.stack(Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[LearningRateAnnealer()]) model.save_params(model_path)
decoding_output = self.decoder.output(internal_variable) classification_output = self.classifier.output(internal_variable) auto_encoder_cost = AutoEncoderCost(decoding_output, x).get() classification_cost = CrossEntropyCost(classification_output, self.target_input).get() final_cost = 0.01 * auto_encoder_cost + classification_cost error_rate = ErrorRateCost(classification_output, self.target_input).get() self.register_monitors(("err", error_rate), ("encoder_cost", auto_encoder_cost), ("classify_cost", classification_cost)) return final_cost if __name__ == '__main__': model = BasicNetwork(input_dim=28 * 28, model=MyJointTrainingModel()) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer = MomentumTrainer(model, {'weight_l2': 0.0001}) trainer.run(mnist, controllers=[LearningRateAnnealer(trainer)]) model.save_params(model_path)
L2NORM_LIMIT = 1.9365 EPSILON = 1e-7 def clip_param_norm(): for param in model.parameters: if param.name.startswith("W"): l2_norms = np.sqrt(np.sum(param.get_value() ** 2, axis=0, keepdims=True)) desired_norms = np.clip(l2_norms, 0, L2NORM_LIMIT) scale = (desired_norms + EPSILON) / (l2_norms + EPSILON) param.set_value(param.get_value() * scale) if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.training_callbacks.append(clip_param_norm) model.stack(Dropout(0.2), Maxout(240, num_pieces=5, init=UniformInitializer(.005)), Maxout(240, num_pieces=5, init=UniformInitializer(.005)), Dense(10, 'linear', init=UniformInitializer(.005)), Softmax()) trainer = MomentumTrainer(model, {"learning_rate": shared_scalar(0.01), "momentum": 0.5}) annealer = ExponentialLearningRateAnnealer(trainer, debug=True) mnist = MiniBatches(MnistDataset(), batch_size=100) trainer.run(mnist, controllers=[annealer]) model.save_params(default_model)
decoding_output = self.decoder.output(internal_variable) classification_output = self.classifier.output(internal_variable) auto_encoder_cost = AutoEncoderCost(decoding_output, x).get() classification_cost = CrossEntropyCost(classification_output, self.target_input).get() final_cost = 0.01 * auto_encoder_cost + classification_cost error_rate = ErrorRateCost(classification_output, self.target_input).get() self.register_monitors( ("err", error_rate), ("encoder_cost", auto_encoder_cost), ("classify_cost", classification_cost) ) return final_cost if __name__ == "__main__": model = BasicNetwork(input_dim=28 * 28, model=MyJointTrainingModel()) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer = MomentumTrainer(model, {"weight_l2": 0.0001}) trainer.run(mnist, controllers=[LearningRateAnnealer(trainer)]) model.save_params(model_path)
class MultiLayerPerceptron(NeuralNetwork): """ Abstract base class for wrapping the multi-layer perceptron functionality from ``deepy``. """ def _setup(self): self.iterations = 0 self.trainer = None self.mlp = None @property def is_convolution(self): return False def _create_mlp_trainer(self, dataset): # Aggregate all the dropout parameters into shared dictionaries. dropout_probs, dropout_scales = {}, {} for l in [l for l in self.layers if l.dropout is not None]: incl = 1.0 - l.dropout dropout_probs[l.name] = incl dropout_scales[l.name] = 1.0 / incl assert len(dropout_probs) == 0 or self.regularize in ('dropout', None) if self.regularize == 'dropout' or len(dropout_probs) > 0: # Use the globally specified dropout rate when there are no layer-specific ones. incl = 1.0 - (self.dropout_rate or 0.5) default_prob, default_scale = incl, 1.0 / incl self.regularize = 'dropout' # Pass all the parameters to pylearn2 as a custom cost function. self.cost = dropout.Dropout( default_input_include_prob=default_prob, default_input_scale=default_scale, input_include_probs=dropout_probs, input_scales=dropout_scales) # Aggregate all regularization parameters into common dictionaries. layer_decay = {} if self.regularize in ('L1', 'L2') or any(l.weight_decay for l in self.layers): wd = self.weight_decay or 0.0001 for l in self.layers: layer_decay[l.name] = l.weight_decay or wd assert len(layer_decay) == 0 or self.regularize in ('L1', 'L2', None) if len(layer_decay) > 0: mlp_default_cost = self.mlp.get_default_cost() if self.regularize == 'L1': l1 = mlp_cost.L1WeightDecay(layer_decay) self.cost = cost.SumOfCosts([mlp_default_cost,l1]) else: # Default is 'L2'. self.regularize = 'L2' l2 = mlp_cost.WeightDecay(layer_decay) self.cost = cost.SumOfCosts([mlp_default_cost,l2]) return self._create_trainer(dataset, self.cost) def _create_mlp(self): model = NeuralRegressor(input_dim=self.unit_counts[0]) for l, n in zip(self.layers, self.unit_counts[1:]): t = 'relu' if l.type == 'Rectifier': t = 'relu' if l.type == 'Linear': t = 'linear' model.stack_layer(Dense(n, t)) model.stack_layer(Softmax()) self.mlp = model def _initialize(self, X, y=None): assert not self.is_initialized,\ "This neural network has already been initialized." self._create_specs(X, y) self._create_mlp() if y is None: return if self.valid_size > 0.0: assert self.valid_set is None, "Can't specify valid_size and valid_set together." X, X_v, y, y_v = sklearn.cross_validation.train_test_split( X, y, test_size=self.valid_size, random_state=self.random_state) self.valid_set = X_v, y_v self.train_set = X, y self.trainer = MomentumTrainer(self.mlp) self.controllers = [ self, LearningRateAnnealer(self.trainer, patience=self.n_stable, anneal_times=0)] def invoke(self): """Controller interface for deepy's trainer. """ self.iterations += 1 return bool(self.iterations >= self.n_iter) @property def is_initialized(self): """Check if the neural network was setup already. """ return self.trainer is not None def _reshape(self, X, y=None): # TODO: Common for all backends. if y is not None and y.ndim == 1: y = y.reshape((y.shape[0], 1)) if self.is_convolution and X.ndim == 3: X = X.reshape((X.shape[0], X.shape[1], X.shape[2], 1)) if self.is_convolution and X.ndim == 2: size = math.sqrt(X.shape[1]) assert size.is_integer(),\ "Input array is not in image shape, and could not assume a square." X = X.reshape((X.shape[0], int(size), int(size), 1)) if not self.is_convolution and X.ndim > 2: X = X.reshape((X.shape[0], numpy.product(X.shape[1:]))) return X, y def _train_impl(self, X, y): self.iterations = 0 data = zip(X, y) self.dataset = SequentialDataset(data) minibatches = MiniBatches(self.dataset, batch_size=20) self.trainer.run(minibatches, controllers=self.controllers) return self def _predict_impl(self, X): return self.mlp.compute(X) def _mlp_to_array(self): return [] def _array_to_mlp(self, array): pass
#!/usr/bin/env python # -*- coding: utf-8 -*- import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax from deepy.trainers import MomentumTrainer, LearningRateAnnealer default_model = os.path.join(os.path.dirname(__file__), "models", "mlp1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(256, 'relu'), Dense(256, 'relu'), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model, {"weight_l2": 0.001}) annealer = LearningRateAnnealer(trainer) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(default_model)
def clip_param_norm(): for param in model.parameters: if param.name.startswith("W"): l2_norms = np.sqrt( np.sum(param.get_value()**2, axis=0, keepdims=True)) desired_norms = np.clip(l2_norms, 0, L2NORM_LIMIT) scale = (desired_norms + EPSILON) / (l2_norms + EPSILON) param.set_value(param.get_value() * scale) if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.training_callbacks.append(clip_param_norm) model.stack(Dropout(0.2), Maxout(240, num_pieces=5, init=UniformInitializer(.005)), Maxout(240, num_pieces=5, init=UniformInitializer(.005)), Dense(10, 'linear', init=UniformInitializer(.005)), Softmax()) trainer = MomentumTrainer(model, { "learning_rate": graph.shared(0.01), "momentum": 0.5 }) annealer = ExponentialLearningRateAnnealer(debug=True) mnist = MiniBatches(MnistDataset(), batch_size=100) trainer.run(mnist, epoch_controllers=[annealer]) model.save_params(default_model)
import logging, os logging.basicConfig(level=logging.INFO) # MNIST Multi-layer model with dropout. from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax, Dropout from deepy.trainers import MomentumTrainer, LearningRateAnnealer model_path = os.path.join(os.path.dirname(__file__), "models", "tutorial1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28*28) model.stack(Dense(256, 'relu'), Dropout(0.2), Dense(256, 'relu'), Dropout(0.2), Dense(10, 'linear'), Softmax()) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer = MomentumTrainer(model, {"learning_rate": LearningRateAnnealer.learning_rate(0.01)}) annealer = LearningRateAnnealer(trainer) trainer.run(mnist, controllers=[annealer]) model.save_params(model_path)
import os logging.basicConfig(level=logging.INFO) from deepy.dataset import MiniBatches,BasicDataset from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax from deepy.trainers import MomentumTrainer, LearningRateAnnealer from SynthDataset import SynthDataset model_path = os.path.join(os.path.dirname(__file__), "models", "model_10000_op.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=32*32) model.stack(Dense(400, 'tanh'), Dense(100, 'tanh'), Dense(3, 'linear'), Softmax()) #trainer = MomentumTrainer(model, {"weight_l2": 0.01}) trainer = MomentumTrainer(model, {"learning_rate": LearningRateAnnealer.learning_rate(0.001)}) annealer = LearningRateAnnealer(trainer) mlp_synthDataSet = MiniBatches(SynthDataset()) trainer.run(mlp_synthDataSet, controllers=[annealer]) model.save_params(model_path)
class MultiLayerPerceptronBackend(BaseBackend): """ Abstract base class for wrapping the multi-layer perceptron functionality from ``deepy``. """ def __init__(self, spec): super(MultiLayerPerceptronBackend, self).__init__(spec) self.iterations = 0 self.trainer = None self.mlp = None l = logging.getLogger('deepy') l.setLevel(logging.WARNING) @property def is_convolution(self): return False def _create_mlp(self): model = NeuralRegressor(input_dim=self.unit_counts[0]) initializer = UniformInitializer(seed=self.random_state) if self.spec.is_convolution: model.stack_layer(layers.DimShuffle((0, 'x', 1, 2))) for l, n in zip(self.layers, self.unit_counts[1:]): t = None if l.type in ('Tanh', 'Sigmoid'): t = l.type.lower() if l.type in ('Rectifier'): t = 'relu' if l.type in ('Linear', 'Softmax'): t = 'linear' assert t is not None, "Unknown activation type `%s`." % l.type if isinstance(l, Layer): # self._check_layer(l, ['units']) model.stack_layer(layers.Dense(n, t, init=initializer)) if l.type == 'Softmax': model.stack_layer(layers.Softmax()) if isinstance(l, layers.Convolution): # self._check_layer(l, ['channel', 'kernel_shape']) model.stack_layer(layers.Convolution( activation=t, filter_shape=(l.channels, l.kernel_shape[0], l.kernel_shape[1]), pool_size=l.pool_shape, border_mode=l.border_mode, init=initializer)) self.mlp = model def _initialize_impl(self, X, y=None): assert not self.is_initialized,\ "This neural network has already been initialized." self._create_specs(X, y) self._create_mlp() if y is None: return if self.valid_size > 0.0: assert self.valid_set is None, "Can't specify valid_size and valid_set together." X, X_v, y, y_v = sklearn.cross_validation.train_test_split( X, y, test_size=self.valid_size, random_state=self.random_state) self.valid_set = X_v, y_v self.train_set = X, y self.trainer = MomentumTrainer(self.mlp) self.controllers = [ self, LearningRateAnnealer(self.trainer, patience=self.n_stable, anneal_times=0)] def invoke(self): """Controller interface for deepy's trainer. """ self.iterations += 1 return bool(self.iterations >= self.n_iter) @property def is_initialized(self): """Check if the neural network was setup already. """ return self.trainer is not None def _train_impl(self, X, y): if self.spec.is_convolution: X = X.reshape(X.shape[:3]) self.iterations = 0 data = zip(X, y) self.dataset = SequentialDataset(data) minibatches = MiniBatches(self.dataset, batch_size=20) self.trainer.run(minibatches, controllers=self.controllers) return self def _predict_impl(self, X): return self.mlp.compute(X) def _mlp_to_array(self): return [] def _array_to_mlp(self, array): pass