ap.add_argument("--model", default="") ap.add_argument("--small", action="store_true") args = ap.parse_args() vocab, lmdata = load_data(small=args.small, history_len=5, batch_size=64) import pdb pdb.set_trace() model = NeuralLM(vocab.size) model.stack( RNN(hidden_size=100, output_type="sequence", hidden_activation='sigmoid', persistent_state=True, batch_size=lmdata.size, reset_state_for_input=0), ClassOutputLayer(output_size=100, class_size=100)) if os.path.exists(args.model): model.load_params(args.model) trainer = SGDTrainer( model, { "learning_rate": LearningRateAnnealer.learning_rate(1.2), "weight_l2": 1e-7 }) annealer = LearningRateAnnealer() trainer.run(lmdata, epoch_controllers=[annealer]) model.save_params(default_model)
# -*- coding: utf-8 -*- import os from deepy.networks import AutoEncoder from deepy.layers import RNN, Dense from deepy.trainers import SGDTrainer, LearningRateAnnealer from util import get_data, VECTOR_SIZE, SEQUENCE_LENGTH HIDDEN_SIZE = 50 model_path = os.path.join(os.path.dirname(__file__), "models", "rnn1.gz") if __name__ == '__main__': model = AutoEncoder(rep_dim=10, input_dim=VECTOR_SIZE, input_tensor=3) model.stack_encoders( RNN(hidden_size=HIDDEN_SIZE, input_type="sequence", output_type="one")) model.stack_decoders( RNN(hidden_size=HIDDEN_SIZE, input_type="one", output_type="sequence", steps=SEQUENCE_LENGTH), Dense(VECTOR_SIZE, 'softmax')) trainer = SGDTrainer(model) annealer = LearningRateAnnealer(trainer) trainer.run(get_data(), controllers=[annealer]) model.save_params(model_path)
ap.add_argument("--model", default="") ap.add_argument("--small", action="store_true") args = ap.parse_args() vocab, lmdata = load_data(small=args.small, history_len=5, batch_size=64) import pdb pdb.set_trace() model = NeuralLM(vocab.size) model.stack( RNN(hidden_size=100, output_type="sequence", hidden_activation='sigmoid', persistent_state=True, batch_size=lmdata.size, reset_state_for_input=0), ClassOutputLayer(output_size=100, class_size=100)) if os.path.exists(args.model): model.load_params(args.model) trainer = SGDTrainer( model, { "learning_rate": LearningRateAnnealer.learning_rate(1.2), "weight_l2": 1e-7 }) annealer = LearningRateAnnealer() trainer.run(lmdata, controllers=[annealer]) model.save_params(default_model)
default_model = os.path.join(os.path.dirname(__file__), "models", "lstm_rnnlmnew.gz") default_dict = '/home/tangyaohua/dl4mt/data/larger.corpus/vocab.chinese.pkl' # default_dict = '/home/tangyh/Dropbox/PycharmProjects/dl4mt/session2/lm/resources/vocab.chinese.pkl' if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default='') ap.add_argument("--dictpath", default=default_dict) ap.add_argument("--small", action="store_true") args = ap.parse_args() vocab, lmdata = load_datagivendict(dictpath=args.dictpath, small=args.small, history_len=5, batch_size=16) inputx=T.imatrix('x') print len(vocab), 'len(vocab)' model = NeuralLM(len(vocab), test_data=None, input_tensor=inputx) model.stack(LSTM(hidden_size=100, output_type="sequence", persistent_state=True, batch_size=lmdata.size, reset_state_for_input=0), FullOutputLayer(len(vocab))) if os.path.exists(args.model): model.load_params(args.model) trainer = SGDTrainer(model, {"learning_rate": LearningRateAnnealer.learning_rate(1.2), "weight_l2": 1e-7}) annealer = LearningRateAnnealer(trainer) trainer.run(lmdata, controllers=[annealer]) model.save_params(default_model)
#!/usr/bin/env python # -*- coding: utf-8 -*- c from deepy.networks import AutoEncoder from deepy.layers import RNN, Dense from deepy.trainers import SGDTrainer, LearningRateAnnealer from util import get_data, VECTOR_SIZE, SEQUENCE_LENGTH HIDDEN_SIZE = 50 model_path = os.path.join(os.path.dirname(__file__), "models", "rnn1.gz") if __name__ == '__main__': model = AutoEncoder(input_dim=VECTOR_SIZE, input_tensor=3) model.stack_encoders(RNN(hidden_size=HIDDEN_SIZE, input_type="sequence", output_type="one")) model.stack_decoders(RNN(hidden_size=HIDDEN_SIZE, input_type="one", output_type="sequence", steps=SEQUENCE_LENGTH), Dense(VECTOR_SIZE, 'softmax')) trainer = SGDTrainer(model) annealer = LearningRateAnnealer(trainer) trainer.run(get_data(), controllers=[annealer]) model.save_params(model_path)
#!/usr/bin/env python # -*- coding: utf-8 -*- """ An auto-encoder for compress MNIST images. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import AutoEncoder from deepy.layers import Dense from deepy.trainers import SGDTrainer, LearningRateAnnealer from deepy.utils import shared_scalar model_path = os.path.join(os.path.dirname(__file__), "models", "mnist_autoencoder.gz") if __name__ == '__main__': model = AutoEncoder(input_dim=28 * 28, rep_dim=30) model.stack_encoders(Dense(50, 'tanh'), Dense(30)) model.stack_decoders(Dense(50, 'tanh'), Dense(28 * 28)) trainer = SGDTrainer(model, {'learning_rate': shared_scalar(0.05), 'gradient_clipping': 3}) mnist = MiniBatches(MnistDataset(for_autoencoder=True), batch_size=20) trainer.run(mnist, controllers=[LearningRateAnnealer(trainer)]) model.save_params(model_path)
""" An auto-encoder for compress MNIST images. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import AutoEncoder from deepy.layers import Dense from deepy.trainers import SGDTrainer, LearningRateAnnealer from deepy.utils import shared_scalar model_path = os.path.join(os.path.dirname(__file__), "models", "mnist_autoencoder.gz") if __name__ == '__main__': model = AutoEncoder(input_dim=28 * 28, rep_dim=30) model.stack_encoders(Dense(50, 'tanh'), Dense(30)) model.stack_decoders(Dense(50, 'tanh'), Dense(28 * 28)) trainer = SGDTrainer(model, { 'learning_rate': shared_scalar(0.05), 'gradient_clipping': 3 }) mnist = MiniBatches(MnistDataset(for_autoencoder=True), batch_size=20) trainer.run(mnist, controllers=[LearningRateAnnealer(trainer)]) model.save_params(model_path)
model = NeuralLM(input_dim=vocab.size, input_tensor=3) model.stack(IRNN(hidden_size=100, output_type="sequence"), Dense(vocab.size, "softmax")) if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "char_irnn_model1.gz")) ap.add_argument("--sample", default="") args = ap.parse_args() if os.path.exists(args.model): model.load_params(args.model) lmdata = LMDataset(vocab, train_path, valid_path, history_len=30, char_based=True, max_tokens=300) batch = SequentialMiniBatches(lmdata, batch_size=20) trainer = SGDTrainer(model) annealer = LearningRateAnnealer(trainer) trainer.run(batch, controllers=[annealer]) model.save_params(args.model)
train_path = os.path.join(resource_dir, "ptb.train.txt") valid_path = os.path.join(resource_dir, "ptb.valid.txt") vocab = Vocab(char_based=True) vocab.load(vocab_path, max_size=1000) model = NeuralLM(input_dim=vocab.size, input_tensor=3) model.stack( RNN(hidden_size=100, output_type="sequence"), RNN(hidden_size=100, output_type="sequence"), Dense(vocab.size, "softmax")) if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "char_rnn_model1.gz")) ap.add_argument("--sample", default="") args = ap.parse_args() if os.path.exists(args.model): model.load_params(args.model) lmdata = LMDataset(vocab, train_path, valid_path, history_len=30, char_based=True, max_tokens=300) batch = SequentialMiniBatches(lmdata, batch_size=20) trainer = SGDTrainer(model) annealer = LearningRateAnnealer() trainer.run(batch, epoch_controllers=[annealer]) model.save_params(args.model)
In my experiment, it turns out the improvement of valid data stopped after 37 epochs. (See models/batch_norm1.log) """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax, BatchNormalization from deepy.trainers import SGDTrainer default_model = os.path.join(os.path.dirname(__file__), "models", "batch_norm1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28*28) model.stack(Dense(100, 'sigmoid'), BatchNormalization(), Dense(100, 'sigmoid'), BatchNormalization(), Dense(100, 'sigmoid'), BatchNormalization(), Dense(10, 'linear'), Softmax()) trainer = SGDTrainer(model) batches = MiniBatches(MnistDataset(), batch_size=60) trainer.run(batches, controllers=[]) model.save_params(default_model)
batch_set = MiniBatches(dataset) if __name__ == '__main__': model = NeuralClassifier(input_dim=26, input_tensor=3) model.stack( RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.1), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.3), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.6), RNN(hidden_size=30, input_type="sequence", output_type="one", vector_core=0.9), Dense(4), Softmax()) trainer = SGDTrainer(model) annealer = LearningRateAnnealer() trainer.run(batch_set.train_set(), batch_set.valid_set(), controllers=[annealer])
valid_path = os.path.join(resource_dir, "ptb.valid.txt") vocab = Vocab(char_based=True) vocab.load(vocab_path, max_size=1000) model = NeuralLM(input_dim=vocab.size, input_tensor=3) model.stack( RNN(hidden_size=100, output_type="sequence"), RNN(hidden_size=100, output_type="sequence"), Dense(vocab.size, "softmax"), ) if __name__ == "__main__": ap = ArgumentParser() ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "char_rnn_model1.gz")) ap.add_argument("--sample", default="") args = ap.parse_args() if os.path.exists(args.model): model.load_params(args.model) lmdata = LMDataset(vocab, train_path, valid_path, history_len=30, char_based=True, max_tokens=300) batch = SequentialMiniBatches(lmdata, batch_size=20) trainer = SGDTrainer(model) annealer = LearningRateAnnealer(trainer) trainer.run(batch, controllers=[annealer]) model.save_params(args.model)
""" This experiment setting is described in http://arxiv.org/pdf/1502.03167v3.pdf. MNIST MLP baseline model. Gaussian initialization described in the paper did not convergence, I have no idea. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax from deepy.trainers import SGDTrainer default_model = os.path.join(os.path.dirname(__file__), "models", "baseline1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(100, 'sigmoid'), Dense(100, 'sigmoid'), Dense(100, 'sigmoid'), Dense(10, 'linear'), Softmax()) trainer = SGDTrainer(model) batches = MiniBatches(MnistDataset(), batch_size=60) trainer.run(batches, epoch_controllers=[]) model.save_params(default_model)
""" An auto-encoder for compress MNIST images. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import AutoEncoder from deepy.layers import Dense from deepy.trainers import SGDTrainer, LearningRateAnnealer from deepy.utils import shared_scalar model_path = os.path.join(os.path.dirname(__file__), "models", "mnist_autoencoder.gz") if __name__ == '__main__': model = AutoEncoder(input_dim=28 * 28, rep_dim=30) model.stack_encoders(Dense(50, 'tanh'), Dense(30)) model.stack_decoders(Dense(50, 'tanh'), Dense(28 * 28)) trainer = SGDTrainer(model, { 'learning_rate': graph.shared(0.05), 'gradient_clipping': 3 }) mnist = MiniBatches(MnistDataset(for_autoencoder=True), batch_size=20) trainer.run(mnist, epoch_controllers=[LearningRateAnnealer()]) model.save_params(model_path)
#!/usr/bin/env python # -*- coding: utf-8 -*- """ This experiment setting is described in http://arxiv.org/pdf/1502.03167v3.pdf. MNIST MLP baseline model. Gaussian initialization described in the paper did not convergence, I have no idea. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax from deepy.trainers import SGDTrainer default_model = os.path.join(os.path.dirname(__file__), "models", "baseline1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(100, 'sigmoid'), Dense(100, 'sigmoid'), Dense(100, 'sigmoid'), Dense(10, 'linear'), Softmax()) trainer = SGDTrainer(model) batches = MiniBatches(MnistDataset(), batch_size=60) trainer.run(batches, controllers=[]) model.save_params(default_model)
# Shuffle the data random.Random(3).shuffle(data) # Separate data valid_size = int(len(data) * 0.15) train_set = data[valid_size:] valid_set = data[:valid_size] dataset = SequentialDataset(train_set, valid=valid_set) dataset.pad_left(20) dataset.report() batch_set = MiniBatches(dataset) if __name__ == '__main__': model = NeuralClassifier(input_dim=26, input_tensor=3) model.stack(RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.1), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.3), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.6), RNN(hidden_size=30, input_type="sequence", output_type="one", vector_core=0.9), Dense(4), Softmax()) trainer = SGDTrainer(model) annealer = LearningRateAnnealer(trainer) trainer.run(batch_set.train_set(), batch_set.valid_set(), controllers=[annealer])