Example #1
0
 def _train_impl(self, X, y):
     if self.spec.is_convolution:
         X = X.reshape(X.shape[:3])
     self.iterations = 0
     data = zip(X, y)
     self.dataset = SequentialDataset(data)
     minibatches = MiniBatches(self.dataset, batch_size=20)
     self.trainer.run(minibatches, controllers=self.controllers)
     return self
Example #2
0
def get_data():
    data = []
    for _ in range(DATA_SIZE):
        sequence = []
        for _ in range(SEQUENCE_LENGTH):
            sequence.append(random_vector())
        data.append([np.vstack(sequence)])
    valid_size = int(DATA_SIZE * 0.1)
    return MiniBatches(BasicDataset(data[valid_size:], valid=data[:valid_size]))
Example #3
0
def run(method, model_path):
    model = NeuralClassifier(input_dim=28 * 28)
    model.stack(Dense(128, 'relu'), Dense(128, 'relu'), Dense(10, 'linear'),
                Softmax())

    trainer = ScipyTrainer(model, method)

    annealer = LearningRateAnnealer()

    mnist = MiniBatches(MnistDataset(), batch_size=100)

    trainer.run(mnist, epoch_controllers=[annealer])

    model.save_params(model_path)
Example #4
0
def run(initializer, model_path):
    model = NeuralClassifier(input_dim=28 * 28)
    for _ in range(6):
        model.stack(Dense(128, 'relu', init=initializer))
    model.stack(Dense(10, 'linear'), Softmax())

    trainer = MomentumTrainer(model)

    annealer = LearningRateAnnealer(trainer)

    mnist = MiniBatches(MnistDataset(), batch_size=20)

    trainer.run(mnist, controllers=[annealer])

    model.save_params(model_path)
Example #5
0
"""
An auto-encoder for compress MNIST images.
"""

import logging, os
logging.basicConfig(level=logging.INFO)

from deepy.dataset import MnistDataset, MiniBatches
from deepy.networks import AutoEncoder
from deepy.layers import Dense
from deepy.trainers import SGDTrainer, LearningRateAnnealer
from deepy.utils import shared_scalar

model_path = os.path.join(os.path.dirname(__file__), "models",
                          "mnist_autoencoder.gz")

if __name__ == '__main__':
    model = AutoEncoder(input_dim=28 * 28, rep_dim=30)
    model.stack_encoders(Dense(50, 'tanh'), Dense(30))
    model.stack_decoders(Dense(50, 'tanh'), Dense(28 * 28))

    trainer = SGDTrainer(model, {
        'learning_rate': shared_scalar(0.05),
        'gradient_clipping': 3
    })

    mnist = MiniBatches(MnistDataset(for_autoencoder=True), batch_size=20)

    trainer.run(mnist, controllers=[LearningRateAnnealer(trainer)])

    model.save_params(model_path)
        model.stack(HighwayLayerLRDiagDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0 = dropout_p_h_0, d_p_1 = dropout_p_h_1, init=init, quasi_ortho_init=True))
    #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init))
    model.stack(Dropout(p=dropout_p_2), Dense(10, init=init))

    
    learning_rate_start  = 3e-3
    #learning_rate_target = 3e-7
    #learning_rate_epochs = 100
    #learning_rate_decay  = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs)
    conf = TrainerConfig()
    conf.learning_rate = LearningRateAnnealer.learning_rate(learning_rate_start)
    #conf.gradient_clipping = 1
    conf.patience = 20
    #conf.gradient_tolerance = 5
    conf.avoid_nan = True
    conf.min_improvement = 1e-10

    #trainer = MomentumTrainer(model)
    trainer = AdamTrainer(model, conf)

    mnist = MiniBatches(MnistDataset(), batch_size=100)
    #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100)

    #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)])
    trainer.run(mnist, controllers=[LearningRateAnnealer(trainer, 3, 14)])
    logging.info('Setting best parameters for testing.')
    trainer.set_params(*trainer.best_params)
    trainer._run_test(-1, mnist.test_set())

    model.save_params(model_path)
expanded_train_set = []

for img, label in mnist.train_set():
    expanded_train_set.append((img, label))
    original_img = (img * 256).reshape((28, 28))
    transformed_img = (elastic_distortion(original_img) / 256).flatten()
    expanded_train_set.append((transformed_img, label))

env.numpy_rand.shuffle(expanded_train_set)

expanded_mnist = BasicDataset(train=expanded_train_set, valid=mnist.valid_set(), test=mnist.test_set())

logging.info("expanded training data size: %d" % len(expanded_train_set))

if __name__ == '__main__':
    model = NeuralClassifier(input_dim=28 * 28)
    model.stack(Dense(256, 'relu'),
                Dense(256, 'relu'),
                Dense(10, 'linear'),
                Softmax())

    trainer = MomentumTrainer(model)

    annealer = LearningRateAnnealer()

    mnist = MiniBatches(expanded_mnist, batch_size=20)

    trainer.run(mnist, epoch_controllers=[annealer])

    model.save_params(default_model)
Example #8
0
    word_matrix = np.vstack(char_vectors)
    data.append((word_matrix, label))

# Shuffle the data
random.Random(3).shuffle(data)

# Separate data
valid_size = int(len(data) * 0.15)
train_set = data[valid_size:]
valid_set = data[:valid_size]

dataset = SequentialDataset(train_set, valid=valid_set)
dataset.pad_left(20)
dataset.report()

batch_set = MiniBatches(dataset)

if __name__ == '__main__':
    model = NeuralClassifier(input_dim=26, input_tensor=3)
    model.stack(
        RNN(hidden_size=30,
            input_type="sequence",
            output_type="sequence",
            vector_core=0.1),
        RNN(hidden_size=30,
            input_type="sequence",
            output_type="sequence",
            vector_core=0.3),
        RNN(hidden_size=30,
            input_type="sequence",
            output_type="sequence",
Example #9
0
                                               quasi_ortho_init=True))
    #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init))
    model.stack(Dropout(p=dropout_p_2), Dense(10, init=init))

    learning_rate_start = 3e-3
    #learning_rate_target = 3e-7
    #learning_rate_epochs = 100
    #learning_rate_decay  = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs)
    conf = TrainerConfig()
    conf.learning_rate = LearningRateAnnealer.learning_rate(
        learning_rate_start)
    #conf.gradient_clipping = 1
    conf.patience = 20
    #conf.gradient_tolerance = 5
    conf.avoid_nan = True
    conf.min_improvement = 1e-10

    #trainer = MomentumTrainer(model)
    trainer = AdamTrainer(model, conf)

    mnist = MiniBatches(MnistDataset(), batch_size=100)
    #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100)

    #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)])
    trainer.run(mnist, controllers=[LearningRateAnnealer(trainer, 3, 14)])
    logging.info('Setting best parameters for testing.')
    trainer.set_params(*trainer.best_params)
    trainer._run_test(-1, mnist.test_set())

    model.save_params(model_path)
Example #10
0
"""
This experiment setting is described in http://arxiv.org/pdf/1502.03167v3.pdf.
MNIST MLP baseline model.
Gaussian initialization described in the paper did not convergence, I have no idea.
"""

import logging, os
logging.basicConfig(level=logging.INFO)

from deepy.dataset import MnistDataset, MiniBatches
from deepy.networks import NeuralClassifier
from deepy.layers import Dense, Softmax
from deepy.trainers import SGDTrainer

default_model = os.path.join(os.path.dirname(__file__), "models", "baseline1.gz")

if __name__ == '__main__':
    model = NeuralClassifier(input_dim=28 * 28)
    model.stack(Dense(100, 'sigmoid'),
                Dense(100, 'sigmoid'),
                Dense(100, 'sigmoid'),
                Dense(10, 'linear'),
                Softmax())

    trainer = SGDTrainer(model)

    batches = MiniBatches(MnistDataset(), batch_size=60)

    trainer.run(batches, epoch_controllers=[])

    model.save_params(default_model)
Example #11
0
    train_monitors["cost"] = model.output
    test_monitors["cost"] = model.test_output

    train_iteration = theano.function(inputs=model.input_variables,
                                      outputs=train_monitors.values(),
                                      updates=gradient_updates,
                                      allow_input_downcast=True)

    valid_iteration = theano.function(inputs=model.input_variables,
                                     outputs=test_monitors.values(),
                                     allow_input_downcast=True)

    max_epochs = 10

    mnist = MiniBatches(MnistDataset(), batch_size=20)

    for i in range(max_epochs):
        # Training
        cost_matrix = []
        for inputs in mnist.train_set():
            costs = train_iteration(*inputs)
            cost_matrix.append(costs)
        train_costs = list(zip(train_monitors.keys(), np.mean(cost_matrix, axis=0)))
        print "train", i, train_costs

        # Test with valid data
        cost_matrix = []
        for inputs in mnist.valid_set():
            costs = valid_iteration(*inputs)
            cost_matrix.append(costs)
    train_monitors["cost"] = model.output
    test_monitors["cost"] = model.test_output

    train_iteration = theano.function(inputs=model.input_variables,
                                      outputs=train_monitors.values(),
                                      updates=gradient_updates,
                                      allow_input_downcast=True)

    valid_iteration = theano.function(inputs=model.input_variables,
                                      outputs=test_monitors.values(),
                                      allow_input_downcast=True)

    max_epochs = 10

    mnist = MiniBatches(MnistDataset(), batch_size=20)

    for i in range(max_epochs):
        # Training
        cost_matrix = []
        for inputs in mnist.train_set():
            costs = train_iteration(*inputs)
            cost_matrix.append(costs)
        train_costs = list(
            zip(train_monitors.keys(), np.mean(cost_matrix, axis=0)))
        print "train", i, train_costs

        # Test with valid data
        cost_matrix = []
        for inputs in mnist.valid_set():
            costs = valid_iteration(*inputs)
Example #13
0
    word_matrix = np.vstack(char_vectors)
    data.append((word_matrix, label))

# Shuffle the data
random.Random(3).shuffle(data)

# Separate data
valid_size = int(len(data) * 0.15)
train_set = data[valid_size:]
valid_set = data[:valid_size]

dataset = SequentialDataset(train_set, valid=valid_set)
dataset.pad_left(20)
dataset.report()

batch_set = MiniBatches(dataset)

if __name__ == '__main__':
    model = NeuralClassifier(input_dim=26, input_tensor=3)
    model.stack(RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.1),
                       RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.3),
                       RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.6),
                       RNN(hidden_size=30, input_type="sequence", output_type="one", vector_core=0.9),
                       Dense(4),
                       Softmax())

    trainer = SGDTrainer(model)

    annealer = LearningRateAnnealer(trainer)

    trainer.run(batch_set.train_set(), batch_set.valid_set(), controllers=[annealer])
Example #14
0
    from argparse import ArgumentParser
    ap = ArgumentParser()
    ap.add_argument("--load", default="", help="pre-trained model path")
    ap.add_argument("--finetune", action="store_true")
    args = ap.parse_args()

    model = DrawModel(image_width=28, image_height=28, attention_times=64)

    if args.load:
        model.load_params(args.load)

    conf = {
        "gradient_clipping": 10,
        "learning_rate": LearningRateAnnealer.learning_rate(0.004),
        "weight_l2": 0
    }
    # conf.avoid_nan = True
    # from deepy import DETECT_NAN_MODE
    # conf.theano_mode = DETECT_NAN_MODE
    # TODO: Find out the problem causing NaN
    if args.finetune:
        trainer = FineTuningAdaGradTrainer(model, conf)
    else:
        trainer = AdamTrainer(model, conf)

    mnist = MiniBatches(BinarizedMnistDataset(), batch_size=100)

    trainer.run(mnist, controllers=[])

    model.save_params(model_path)
Example #15
0
default_model = os.path.join(os.path.dirname(__file__), "models",
                             "deep_conv.gz")

if __name__ == '__main__':
    model = NeuralClassifier(input_dim=28 * 28)
    model.stack(  # Reshape to 3D tensor
        Reshape((-1, 28, 28)),
        # Add a new dimension for convolution
        DimShuffle((0, 'x', 1, 2)),
        Convolution((4, 1, 5, 5), activation="relu"),
        Dropout(0.15),
        Convolution((8, 4, 5, 5), activation="relu"),
        Dropout(0.1),
        Convolution((16, 8, 3, 3), activation="relu"),
        Flatten(),
        Dropout(0.1),
        # As dimension information was lost, reveal it to the pipe line
        RevealDimension(16),
        Dense(10, 'linear'),
        Softmax())

    trainer = MomentumTrainer(model)

    annealer = LearningRateAnnealer()

    mnist = MiniBatches(MnistDataset(), batch_size=20)

    trainer.run(mnist, controllers=[annealer])

    model.save_params(default_model)
Example #16
0
from deepy.utils import Timer

logging.basicConfig(level=logging.INFO)

if __name__ == '__main__':
    ap = ArgumentParser()
    ap.add_argument("--model", default="/tmp/mnist_att_params2.gz")
    ap.add_argument("--method", default="momentum")
    ap.add_argument("--learning_rate", default=0.01)
    ap.add_argument("--variance", default=0.005)
    ap.add_argument("--disable_backprop", default=False)
    ap.add_argument("--disable_reinforce", default=False)
    ap.add_argument("--random_glimpse", default=False)
    args = ap.parse_args()

    mnist = MiniBatches((MnistDataset()), batch_size=1)

    model_path = args.model

    network = get_network(model_path, std=args.variance,
                          disable_reinforce=args.disable_reinforce, random_glimpse=args.random_glimpse)

    trainer_conf = TrainerConfig()
    trainer_conf.learning_rate = args.learning_rate
    trainer_conf.weight_l2 = 0.0001
    trainer_conf.hidden_l2 = 0.0001
    trainer_conf.method = args.method
    trainer_conf.disable_reinforce=args.disable_reinforce
    trainer_conf.disable_backprop=args.disable_backprop

    trainer = AttentionTrainer(network, network.layers[0], config=trainer_conf)
Example #17
0
        if b == 1:
            sum += a
        sequence.append(np.array([a, b], dtype=FLOATX))
    sequence = np.vstack(sequence)
    sum = np.array([sum], dtype=FLOATX)
    data.append((sequence, sum))

# Separate data
valid_size = int(1000)
train_set = data[valid_size:]
valid_set = data[:valid_size]

dataset = SequentialDataset(train_set, valid=valid_set)
dataset.report()

batch_set = MiniBatches(dataset, batch_size=32)

if __name__ == '__main__':

    ap = ArgumentParser()
    ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "sequence_adding_100_2.gz"))
    args = ap.parse_args()

    model = NeuralRegressor(input_dim=2, input_tensor=3)
    model.stack(IRNN(hidden_size=100, input_type="sequence",
                     output_type="one"),
                      Dense(1))

    if os.path.exists(args.model):
        model.load_params(args.model)