def _train_impl(self, X, y): if self.spec.is_convolution: X = X.reshape(X.shape[:3]) self.iterations = 0 data = zip(X, y) self.dataset = SequentialDataset(data) minibatches = MiniBatches(self.dataset, batch_size=20) self.trainer.run(minibatches, controllers=self.controllers) return self
def get_data(): data = [] for _ in range(DATA_SIZE): sequence = [] for _ in range(SEQUENCE_LENGTH): sequence.append(random_vector()) data.append([np.vstack(sequence)]) valid_size = int(DATA_SIZE * 0.1) return MiniBatches(BasicDataset(data[valid_size:], valid=data[:valid_size]))
def run(method, model_path): model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(128, 'relu'), Dense(128, 'relu'), Dense(10, 'linear'), Softmax()) trainer = ScipyTrainer(model, method) annealer = LearningRateAnnealer() mnist = MiniBatches(MnistDataset(), batch_size=100) trainer.run(mnist, epoch_controllers=[annealer]) model.save_params(model_path)
def run(initializer, model_path): model = NeuralClassifier(input_dim=28 * 28) for _ in range(6): model.stack(Dense(128, 'relu', init=initializer)) model.stack(Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer(trainer) mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(model_path)
""" An auto-encoder for compress MNIST images. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import AutoEncoder from deepy.layers import Dense from deepy.trainers import SGDTrainer, LearningRateAnnealer from deepy.utils import shared_scalar model_path = os.path.join(os.path.dirname(__file__), "models", "mnist_autoencoder.gz") if __name__ == '__main__': model = AutoEncoder(input_dim=28 * 28, rep_dim=30) model.stack_encoders(Dense(50, 'tanh'), Dense(30)) model.stack_decoders(Dense(50, 'tanh'), Dense(28 * 28)) trainer = SGDTrainer(model, { 'learning_rate': shared_scalar(0.05), 'gradient_clipping': 3 }) mnist = MiniBatches(MnistDataset(for_autoencoder=True), batch_size=20) trainer.run(mnist, controllers=[LearningRateAnnealer(trainer)]) model.save_params(model_path)
model.stack(HighwayLayerLRDiagDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0 = dropout_p_h_0, d_p_1 = dropout_p_h_1, init=init, quasi_ortho_init=True)) #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init)) model.stack(Dropout(p=dropout_p_2), Dense(10, init=init)) learning_rate_start = 3e-3 #learning_rate_target = 3e-7 #learning_rate_epochs = 100 #learning_rate_decay = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate(learning_rate_start) #conf.gradient_clipping = 1 conf.patience = 20 #conf.gradient_tolerance = 5 conf.avoid_nan = True conf.min_improvement = 1e-10 #trainer = MomentumTrainer(model) trainer = AdamTrainer(model, conf) mnist = MiniBatches(MnistDataset(), batch_size=100) #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100) #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)]) trainer.run(mnist, controllers=[LearningRateAnnealer(trainer, 3, 14)]) logging.info('Setting best parameters for testing.') trainer.set_params(*trainer.best_params) trainer._run_test(-1, mnist.test_set()) model.save_params(model_path)
expanded_train_set = [] for img, label in mnist.train_set(): expanded_train_set.append((img, label)) original_img = (img * 256).reshape((28, 28)) transformed_img = (elastic_distortion(original_img) / 256).flatten() expanded_train_set.append((transformed_img, label)) env.numpy_rand.shuffle(expanded_train_set) expanded_mnist = BasicDataset(train=expanded_train_set, valid=mnist.valid_set(), test=mnist.test_set()) logging.info("expanded training data size: %d" % len(expanded_train_set)) if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(256, 'relu'), Dense(256, 'relu'), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer() mnist = MiniBatches(expanded_mnist, batch_size=20) trainer.run(mnist, epoch_controllers=[annealer]) model.save_params(default_model)
word_matrix = np.vstack(char_vectors) data.append((word_matrix, label)) # Shuffle the data random.Random(3).shuffle(data) # Separate data valid_size = int(len(data) * 0.15) train_set = data[valid_size:] valid_set = data[:valid_size] dataset = SequentialDataset(train_set, valid=valid_set) dataset.pad_left(20) dataset.report() batch_set = MiniBatches(dataset) if __name__ == '__main__': model = NeuralClassifier(input_dim=26, input_tensor=3) model.stack( RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.1), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.3), RNN(hidden_size=30, input_type="sequence", output_type="sequence",
quasi_ortho_init=True)) #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init)) model.stack(Dropout(p=dropout_p_2), Dense(10, init=init)) learning_rate_start = 3e-3 #learning_rate_target = 3e-7 #learning_rate_epochs = 100 #learning_rate_decay = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs) conf = TrainerConfig() conf.learning_rate = LearningRateAnnealer.learning_rate( learning_rate_start) #conf.gradient_clipping = 1 conf.patience = 20 #conf.gradient_tolerance = 5 conf.avoid_nan = True conf.min_improvement = 1e-10 #trainer = MomentumTrainer(model) trainer = AdamTrainer(model, conf) mnist = MiniBatches(MnistDataset(), batch_size=100) #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100) #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)]) trainer.run(mnist, controllers=[LearningRateAnnealer(trainer, 3, 14)]) logging.info('Setting best parameters for testing.') trainer.set_params(*trainer.best_params) trainer._run_test(-1, mnist.test_set()) model.save_params(model_path)
""" This experiment setting is described in http://arxiv.org/pdf/1502.03167v3.pdf. MNIST MLP baseline model. Gaussian initialization described in the paper did not convergence, I have no idea. """ import logging, os logging.basicConfig(level=logging.INFO) from deepy.dataset import MnistDataset, MiniBatches from deepy.networks import NeuralClassifier from deepy.layers import Dense, Softmax from deepy.trainers import SGDTrainer default_model = os.path.join(os.path.dirname(__file__), "models", "baseline1.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack(Dense(100, 'sigmoid'), Dense(100, 'sigmoid'), Dense(100, 'sigmoid'), Dense(10, 'linear'), Softmax()) trainer = SGDTrainer(model) batches = MiniBatches(MnistDataset(), batch_size=60) trainer.run(batches, epoch_controllers=[]) model.save_params(default_model)
train_monitors["cost"] = model.output test_monitors["cost"] = model.test_output train_iteration = theano.function(inputs=model.input_variables, outputs=train_monitors.values(), updates=gradient_updates, allow_input_downcast=True) valid_iteration = theano.function(inputs=model.input_variables, outputs=test_monitors.values(), allow_input_downcast=True) max_epochs = 10 mnist = MiniBatches(MnistDataset(), batch_size=20) for i in range(max_epochs): # Training cost_matrix = [] for inputs in mnist.train_set(): costs = train_iteration(*inputs) cost_matrix.append(costs) train_costs = list(zip(train_monitors.keys(), np.mean(cost_matrix, axis=0))) print "train", i, train_costs # Test with valid data cost_matrix = [] for inputs in mnist.valid_set(): costs = valid_iteration(*inputs) cost_matrix.append(costs)
train_monitors["cost"] = model.output test_monitors["cost"] = model.test_output train_iteration = theano.function(inputs=model.input_variables, outputs=train_monitors.values(), updates=gradient_updates, allow_input_downcast=True) valid_iteration = theano.function(inputs=model.input_variables, outputs=test_monitors.values(), allow_input_downcast=True) max_epochs = 10 mnist = MiniBatches(MnistDataset(), batch_size=20) for i in range(max_epochs): # Training cost_matrix = [] for inputs in mnist.train_set(): costs = train_iteration(*inputs) cost_matrix.append(costs) train_costs = list( zip(train_monitors.keys(), np.mean(cost_matrix, axis=0))) print "train", i, train_costs # Test with valid data cost_matrix = [] for inputs in mnist.valid_set(): costs = valid_iteration(*inputs)
word_matrix = np.vstack(char_vectors) data.append((word_matrix, label)) # Shuffle the data random.Random(3).shuffle(data) # Separate data valid_size = int(len(data) * 0.15) train_set = data[valid_size:] valid_set = data[:valid_size] dataset = SequentialDataset(train_set, valid=valid_set) dataset.pad_left(20) dataset.report() batch_set = MiniBatches(dataset) if __name__ == '__main__': model = NeuralClassifier(input_dim=26, input_tensor=3) model.stack(RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.1), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.3), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.6), RNN(hidden_size=30, input_type="sequence", output_type="one", vector_core=0.9), Dense(4), Softmax()) trainer = SGDTrainer(model) annealer = LearningRateAnnealer(trainer) trainer.run(batch_set.train_set(), batch_set.valid_set(), controllers=[annealer])
from argparse import ArgumentParser ap = ArgumentParser() ap.add_argument("--load", default="", help="pre-trained model path") ap.add_argument("--finetune", action="store_true") args = ap.parse_args() model = DrawModel(image_width=28, image_height=28, attention_times=64) if args.load: model.load_params(args.load) conf = { "gradient_clipping": 10, "learning_rate": LearningRateAnnealer.learning_rate(0.004), "weight_l2": 0 } # conf.avoid_nan = True # from deepy import DETECT_NAN_MODE # conf.theano_mode = DETECT_NAN_MODE # TODO: Find out the problem causing NaN if args.finetune: trainer = FineTuningAdaGradTrainer(model, conf) else: trainer = AdamTrainer(model, conf) mnist = MiniBatches(BinarizedMnistDataset(), batch_size=100) trainer.run(mnist, controllers=[]) model.save_params(model_path)
default_model = os.path.join(os.path.dirname(__file__), "models", "deep_conv.gz") if __name__ == '__main__': model = NeuralClassifier(input_dim=28 * 28) model.stack( # Reshape to 3D tensor Reshape((-1, 28, 28)), # Add a new dimension for convolution DimShuffle((0, 'x', 1, 2)), Convolution((4, 1, 5, 5), activation="relu"), Dropout(0.15), Convolution((8, 4, 5, 5), activation="relu"), Dropout(0.1), Convolution((16, 8, 3, 3), activation="relu"), Flatten(), Dropout(0.1), # As dimension information was lost, reveal it to the pipe line RevealDimension(16), Dense(10, 'linear'), Softmax()) trainer = MomentumTrainer(model) annealer = LearningRateAnnealer() mnist = MiniBatches(MnistDataset(), batch_size=20) trainer.run(mnist, controllers=[annealer]) model.save_params(default_model)
from deepy.utils import Timer logging.basicConfig(level=logging.INFO) if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default="/tmp/mnist_att_params2.gz") ap.add_argument("--method", default="momentum") ap.add_argument("--learning_rate", default=0.01) ap.add_argument("--variance", default=0.005) ap.add_argument("--disable_backprop", default=False) ap.add_argument("--disable_reinforce", default=False) ap.add_argument("--random_glimpse", default=False) args = ap.parse_args() mnist = MiniBatches((MnistDataset()), batch_size=1) model_path = args.model network = get_network(model_path, std=args.variance, disable_reinforce=args.disable_reinforce, random_glimpse=args.random_glimpse) trainer_conf = TrainerConfig() trainer_conf.learning_rate = args.learning_rate trainer_conf.weight_l2 = 0.0001 trainer_conf.hidden_l2 = 0.0001 trainer_conf.method = args.method trainer_conf.disable_reinforce=args.disable_reinforce trainer_conf.disable_backprop=args.disable_backprop trainer = AttentionTrainer(network, network.layers[0], config=trainer_conf)
if b == 1: sum += a sequence.append(np.array([a, b], dtype=FLOATX)) sequence = np.vstack(sequence) sum = np.array([sum], dtype=FLOATX) data.append((sequence, sum)) # Separate data valid_size = int(1000) train_set = data[valid_size:] valid_set = data[:valid_size] dataset = SequentialDataset(train_set, valid=valid_set) dataset.report() batch_set = MiniBatches(dataset, batch_size=32) if __name__ == '__main__': ap = ArgumentParser() ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "sequence_adding_100_2.gz")) args = ap.parse_args() model = NeuralRegressor(input_dim=2, input_tensor=3) model.stack(IRNN(hidden_size=100, input_type="sequence", output_type="one"), Dense(1)) if os.path.exists(args.model): model.load_params(args.model)