def get_model(state_num, action_num): model = NeuralRegressor(state_num) model.stack( Dense(HIDDEN_UNITS, activation='tanh', init=GaussianInitializer(deviation=0.01)), Dense(action_num, init=GaussianInitializer(deviation=0.01))) return model
def run(method, model_path): model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(128, 'relu'), Dense(128, 'relu'), Dense(10, 'linear'), Softmax()) trainer = ScipyTrainer(model, method) annealer = LearningRateAnnealer() mnist = MiniBatches(MnistDataset(), batch_size=100) trainer.run(mnist, epoch_controllers=[annealer]) model.save_params(model_path)
def run(initializer, model_path): model = NeuralClassifier(input_dim=28 * 28) for _ in range(6): model.stack(Dense(128, 'relu', init=initializer)) model.stack(Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer(trainer) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(model_path)
def prepare(self): # Output layers self.output_layer = Chain(self.input_dim).stack( Dense(self.output_size * self.class_size)) self.softmax_layer = Softmax().initialize(input_dim=self.output_size) self.class_layer = Chain(self.input_dim).stack(Dense(self.class_size), Softmax3D()) self.register_inner_layers(self.class_layer, self.output_layer) # Target tensor self.target_tensor = T.imatrix('target') self.register_external_targets(self.target_tensor) # arange cache self.arange_cache = theano.shared(np.arange(10 * 64), name="arange_cache")
def setup(self): """ All codes that create parameters should be put into 'setup' function. """ self.output_dim = 10 self.encoder = Chain(self.input_dim).stack( Dense(self.internal_layer_size, 'tanh')) self.decoder = Chain(self.internal_layer_size).stack( Dense(self.input_dim)) self.classifier = Chain(self.internal_layer_size).stack( Dense(50, 'tanh'), Dense(self.output_dim), Softmax()) self.register_inner_layers(self.encoder, self.decoder, self.classifier) self.target_input = T.ivector('target') self.register_external_inputs(self.target_input)
""" An auto-encoder for compress MNIST images. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import AutoEncoder from deepy.layers import Dense from deepy.trainers import SGDTrainer, LearningRateAnnealer from deepy.utils import shared_scalar model_path = os.path.join(os.path.dirname(__file__), "models", "mnist_autoencoder.gz") if __name__ == '__main__': model = AutoEncoder(input_dim=28 * 28, rep_dim=30) model.stack_encoders(Dense(50, 'tanh'), Dense(30)) model.stack_decoders(Dense(50, 'tanh'), Dense(28 * 28)) trainer = SGDTrainer(model, { 'learning_rate': shared_scalar(0.05), 'gradient_clipping': 3 }) mnist = MiniBatches(MnistDataset(for_autoencoder=True), batch_size=20) trainer.run(mnist, controllers=[LearningRateAnnealer(trainer)]) model.save_params(model_path)
#!/usr/bin/env python # -*- coding: utf-8 -*- import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax, Dropout, PRelu from deepy.trainers import MomentumTrainer, LearningRateAnnealer default_model = os.path.join(os.path.dirname(__file__), "models", "mlp_prelu_dropout1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(256, 'linear'), PRelu(), Dropout(0.2), Dense(256, 'linear'), PRelu(), Dropout(0.2), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer() mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(default_model)
logging.basicConfig(level=logging.INFO) resource_dir = os.path.abspath(os.path.dirname(__file__)) + os.sep + "resources" vocab_path = os.path.join(resource_dir, "ptb.train.txt") train_path = os.path.join(resource_dir, "ptb.train.txt") valid_path = os.path.join(resource_dir, "ptb.valid.txt") vocab = Vocab(char_based=True) vocab.load(vocab_path, max_size=1000) model = NeuralLM(input_dim=vocab.size, input_tensor=3) model.stack( RNN(hidden_size=100, output_type="sequence"), RNN(hidden_size=100, output_type="sequence"), Dense(vocab.size, "softmax")) if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "char_rnn_model1.gz")) ap.add_argument("--sample", default="") args = ap.parse_args() if os.path.exists(args.model): model.load_params(args.model) lmdata = LMDataset(vocab, train_path, valid_path, history_len=30, char_based=True, max_tokens=300) batch = SequentialMiniBatches(lmdata, batch_size=20) trainer = SGDTrainer(model)
expanded_train_set = [] for img, label in mnist.train_set(): expanded_train_set.append((img, label)) original_img = (img * 256).reshape((28, 28)) transformed_img = (elastic_distortion(original_img) / 256).flatten() expanded_train_set.append((transformed_img, label)) env.numpy_rand.shuffle(expanded_train_set) expanded_mnist = BasicDataset(train=expanded_train_set, valid=mnist.valid_set(), test=mnist.test_set()) logging.info("expanded training data size: %d" % len(expanded_train_set)) if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(256, 'relu'), Dense(256, 'relu'), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer() mnist = MiniBatches(expanded_mnist, batch_size=20) trainer.run(mnist, epoch_controllers=[annealer]) model.save_params(default_model)
#!/usr/bin/env python # -*- coding: utf-8 -*- """ This experiment setting is described in http://arxiv.org/pdf/1502.03167v3.pdf. MNIST MLP baseline model. Gaussian initialization described in the paper did not convergence, I have no idea. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax from deepy.trainers import SGDTrainer default_model = os.path.join(os.path.dirname(__file__), "models", "baseline1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(100, 'sigmoid'), Dense(100, 'sigmoid'), Dense(100, 'sigmoid'), Dense(10, 'linear'), Softmax()) trainer = SGDTrainer(model) batches = MiniBatches(MnistDataset(), batch_size=60) trainer.run(batches, controllers=[]) model.save_params(default_model)
This experiment setting is described in http://arxiv.org/pdf/1502.03167v3.pdf. MNIST MLP model with batch normalization. In my experiment, it turns out the improvement of valid data stopped after 37 epochs. (See models/batch_norm1.log) """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax, BatchNormalization from deepy.trainers import SGDTrainer default_model = os.path.join(os.path.dirname(__file__), "models", "batch_norm1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(100, 'sigmoid'), BatchNormalization(), Dense(100, 'sigmoid'), BatchNormalization(), Dense(100, 'sigmoid'), BatchNormalization(), Dense(10, 'linear'), Softmax()) trainer = SGDTrainer(model) batches = MiniBatches(MnistDataset(), batch_size=60) trainer.run(batches, controllers=[]) model.save_params(default_model)
Classify MNIST digits using a very deep think network. Plain deep networks are very hard to be trained, as shown in this case. But we should notice that if highway layers just learn to pass information forward, in other words, just be transparent layers, then they would be meaningless. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax from deepy.trainers import MomentumTrainer, LearningRateAnnealer model_path = os.path.join(os.path.dirname(__file__), "models", "baseline1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) for _ in range(20): model.stack(Dense(71, 'relu')) model.stack(Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[LearningRateAnnealer()]) model.save_params(model_path)
L2NORM_LIMIT = 1.9365 EPSILON = 1e-7 def clip_param_norm(): for param in model.parameters: if param.name.startswith("W"): l2_norms = np.sqrt(np.sum(param.get_value() ** 2, axis=0, keepdims=True)) desired_norms = np.clip(l2_norms, 0, L2NORM_LIMIT) scale = (desired_norms + EPSILON) / (l2_norms + EPSILON) param.set_value(param.get_value() * scale) if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.training_callbacks.append(clip_param_norm) model.stack(Dropout(0.2), Maxout(240, num_pieces=5, init=UniformInitializer(.005)), Maxout(240, num_pieces=5, init=UniformInitializer(.005)), Dense(10, 'linear', init=UniformInitializer(.005)), Softmax()) trainer = MomentumTrainer(model, {"learning_rate": shared_scalar(0.01), "momentum": 0.5}) annealer = ExponentialLearningRateAnnealer(trainer, debug=True) mnist = MiniBatches(MnistDataset(), batch_size=100) trainer.run(mnist, controllers=[annealer]) model.save_params(default_model)
dataset = SequentialDataset(train_set, valid=valid_set) dataset.report() batch_set = MiniBatches(dataset, batch_size=32) if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "sequence_adding_100_2.gz")) args = ap.parse_args() model = NeuralRegressor(input_dim=2, input_tensor=3) model.stack(IRNN(hidden_size=100, input_type="sequence", output_type="one"), Dense(1)) if os.path.exists(args.model): model.load_params(args.model) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate(0.01) conf.gradient_clipping = 3 conf.patience = 50 conf.gradient_tolerance = 5 conf.avoid_nan = False trainer = SGDTrainer(model, conf) annealer = LearningRateAnnealer(patience=20) trainer.run(batch_set, controllers=[annealer])
def prepare(self): self.core = Chain(self.input_dim).stack(Dense(self.vocab_size), Softmax3D()) self.register_inner_layers(self.core)
""" This experiment setting is described in http://arxiv.org/pdf/1502.03167v3.pdf. MNIST MLP baseline model. Gaussian initialization described in the paper did not convergence, I have no idea. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax from deepy.trainers import SGDTrainer default_model = os.path.join(os.path.dirname(__file__), "models", "baseline1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(100, 'sigmoid'), Dense(100, 'sigmoid'), Dense(100, 'sigmoid'), Dense(10, 'linear'), Softmax()) trainer = SGDTrainer(model) batches = MiniBatches(MnistDataset(), batch_size=60) trainer.run(batches, epoch_controllers=[]) model.save_params(default_model)
# -*- coding: utf-8 -*- #!/usr/bin/env python # -*- coding: utf-8 -*- import os from deepy.networks import AutoEncoder from deepy.layers import RNN, Dense from deepy.trainers import SGDTrainer, LearningRateAnnealer from util import get_data, VECTOR_SIZE, SEQUENCE_LENGTH HIDDEN_SIZE = 50 model_path = os.path.join(os.path.dirname(__file__), "models", "rnn1.gz") if __name__ == '__main__': model = AutoEncoder(input_dim=VECTOR_SIZE, input_tensor=3) model.stack_encoders(RNN(hidden_size=HIDDEN_SIZE, input_type="sequence", output_type="one")) model.stack_decoders(RNN(hidden_size=HIDDEN_SIZE, input_type="one", output_type="sequence", steps=SEQUENCE_LENGTH), Dense(VECTOR_SIZE, 'softmax')) trainer = SGDTrainer(model) annealer = LearningRateAnnealer(trainer) trainer.run(get_data(), controllers=[annealer]) model.save_params(model_path)
# -*- coding: utf-8 -*- import os from deepy.networks import AutoEncoder from deepy.layers import RNN, Dense from deepy.trainers import SGDTrainer, LearningRateAnnealer from util import get_data, VECTOR_SIZE, SEQUENCE_LENGTH HIDDEN_SIZE = 50 model_path = os.path.join(os.path.dirname(__file__), "models", "rnn1.gz") if __name__ == '__main__': model = AutoEncoder(rep_dim=10, input_dim=VECTOR_SIZE, input_tensor=3) model.stack_encoders( RNN(hidden_size=HIDDEN_SIZE, input_type="sequence", output_type="one")) model.stack_decoders( RNN(hidden_size=HIDDEN_SIZE, input_type="one", output_type="sequence", steps=SEQUENCE_LENGTH), Dense(VECTOR_SIZE, 'softmax')) trainer = SGDTrainer(model) annealer = LearningRateAnnealer(trainer) trainer.run(get_data(), controllers=[annealer]) model.save_params(model_path)
if __name__ == '__main__': dropout_p_0 = 0.2 dropout_p_h_0 = 0.3 dropout_p_h_1 = 0.3 dropout_p_2 = 0.5 T = 5 n = 1024 d = 256 gate_bias = -1.0 activation = 'relu' #l2_reg = 0.001 l2_reg = 1e-5 init = XavierGlorotInitializer() model = L2HingeNeuralClassifier(input_dim=28 * 28, last_layer_l2_regularization=l2_reg) model.stack(Dropout(p=dropout_p_0), Dense(n, init=init, disable_bias=True), BatchNormalization(), Activation(activation)) #model.stack(Dropout(p=dropout_p_0), BatchNormalization()) for _ in range(T): #model.stack(HighwayLayerLRDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0 = dropout_p_h_0, d_p_1 = dropout_p_h_1, init=init)) model.stack( HighwayLayerLRDiagDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0=dropout_p_h_0, d_p_1=dropout_p_h_1, init=init, quasi_ortho_init=True)) #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init)) model.stack(Dropout(p=dropout_p_2), Dense(10, init=init))
#!/usr/bin/env python # -*- coding: utf-8 -*- """ For reference, this model should achieve 1.50% error rate, in 10 mins with i7 CPU (8 threads). """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax, Dropout from deepy.trainers import MomentumTrainer, LearningRateAnnealer default_model = os.path.join(os.path.dirname(__file__), "models", "mlp_dropout1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(256, 'relu'), Dropout(0.5), Dense(256, 'relu'), Dropout(0.5), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer() mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, epoch_controllers=[annealer]) model.save_params(default_model)
from deepy.layers import LSTM, Dense logging.basicConfig(level=logging.INFO) resource_dir = os.path.abspath( os.path.dirname(__file__)) + os.sep + "resources" vocab_path = os.path.join(resource_dir, "ptb.train.txt") train_path = os.path.join(resource_dir, "ptb.train.txt") valid_path = os.path.join(resource_dir, "ptb.valid.txt") vocab = Vocab(char_based=True) vocab.load(vocab_path, max_size=1000) model = NeuralLM(input_dim=vocab.size, input_tensor=3) model.stack(LSTM(hidden_size=100, output_type="sequence"), Dense(vocab.size, activation="softmax")) default_model = os.path.join(os.path.dirname(__file__), "models", "char_lstm_model1.gz") if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default=default_model) ap.add_argument("--sample", default="") args = ap.parse_args() if os.path.exists(args.model): model.load_params(args.model) lmdata = LMDataset(vocab, train_path,
default_model = os.path.join(os.path.dirname(__file__), "models", "deep_conv.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack( # Reshape to 3D tensor Reshape((-1, 28, 28)), # Add a new dimension for convolution DimShuffle((0, 'x', 1, 2)), Convolution((4, 1, 5, 5), activation="relu"), Dropout(0.15), Convolution((8, 4, 5, 5), activation="relu"), Dropout(0.1), Convolution((16, 8, 3, 3), activation="relu"), Flatten(), Dropout(0.1), # As dimension information was lost, reveal it to the pipe line RevealDimension(16), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer() mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(default_model)
batch_set = MiniBatches(dataset) if __name__ == '__main__': model = NeuralClassifier(input_dim=26, input_tensor=3) model.stack( RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.1), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.3), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.6), RNN(hidden_size=30, input_type="sequence", output_type="one", vector_core=0.9), Dense(4), Softmax()) trainer = SGDTrainer(model) annealer = LearningRateAnnealer() trainer.run(batch_set.train_set(), batch_set.valid_set(), controllers=[annealer])