def _build_trainer(nb_epochs):
        print("Will train Convoluational Deep NADE for a total of {0} epochs.".
              format(nb_epochs))

        with Timer("Building model"):
            builder = DeepConvNADEBuilder(image_shape=image_shape,
                                          nb_channels=nb_channels,
                                          use_mask_as_input=use_mask_as_input)

            convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)"
            fullnet_blueprint = "5 -> 16"
            print("Convnet:", convnet_blueprint)
            print("Fullnet:", fullnet_blueprint)
            builder.build_convnet_from_blueprint(convnet_blueprint)
            builder.build_fullnet_from_blueprint(fullnet_blueprint)

            model = builder.build()
            model.initialize(initer.UniformInitializer(random_seed=1234))

        with Timer("Building optimizer"):
            loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(
                model, trainset)

            optimizer = SGD(loss=loss)
            optimizer.append_direction_modifier(ConstantLearningRate(0.001))

        with Timer("Building trainer"):
            batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(
                trainset, batch_size)

            trainer = Trainer(optimizer, batch_scheduler)

            # Print time for one epoch
            trainer.append_task(tasks.PrintEpochDuration())
            trainer.append_task(tasks.PrintTrainingDuration())

            # Log training error
            loss_monitor = views.MonitorVariable(loss.loss)
            avg_loss = tasks.AveragePerEpoch(loss_monitor)
            accum = tasks.Accumulator(loss_monitor)
            logger = tasks.Logger(loss_monitor, avg_loss)
            trainer.append_task(logger, avg_loss, accum)

            # Print average training loss.
            trainer.append_task(
                tasks.Print("Avg. training loss:     : {}", avg_loss))

            # Print NLL mean/stderror.
            nll = views.LossView(
                loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask(
                    model, validset),
                batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask(
                    validset, batch_size=len(validset), keep_mask=True))
            trainer.append_task(
                tasks.Print("Validset - NLL          : {0:.2f} ± {1:.2f}",
                            nll.mean, nll.stderror))

            trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs))

            return trainer, nll, logger
Beispiel #2
0
    def test_fprop_faster(self):
        activation = "tanh"
        seed = 1234
        repeat = 1000

        layer = LayerLSTM(input_size=DATA['features_size'],
                          hidden_size=DATA['hidden_size'],
                          activation=activation)

        layer.initialize(initer.UniformInitializer(seed))

        layer_fast = LayerLSTMFast(input_size=DATA['features_size'],
                                   hidden_size=DATA['hidden_size'],
                                   activation=activation)

        # Wi, Wo, Wf, Wm
        layer_fast.W.set_value(
            np.concatenate([
                layer.Wi.get_value(),
                layer.Wo.get_value(),
                layer.Wf.get_value(),
                layer.Wm.get_value()
            ],
                           axis=1))
        layer_fast.U.set_value(
            np.concatenate([
                layer.Ui.get_value(),
                layer.Uo.get_value(),
                layer.Uf.get_value(),
                layer.Um.get_value()
            ],
                           axis=1))

        input = T.matrix('input')
        input.tag.test_value = DATA['batch_one_step']
        last_h = sharedX(DATA['state_h'])
        last_m = sharedX(DATA['state_m'])

        fprop = theano.function([input], layer.fprop(input, last_h, last_m))
        fprop_faster = theano.function([input],
                                       layer_fast.fprop(input, last_h, last_m))

        fprop_time = measure("h, m = fprop(DATA['batch_one_step'])", repeat)
        fprop_faster_time = measure(
            "h, m = fprop_faster(DATA['batch_one_step'])", repeat)

        print("fprop time: {:.2f} sec.", fprop_time)
        print("fprop faster time: {:.2f} sec.", fprop_faster_time)
        print("Speedup: {:.2f}x".format(fprop_time / fprop_faster_time))

        for i in range(DATA['seq_len']):
            h1, m1 = fprop(DATA['batch'][:, i, :])
            h2, m2 = fprop_faster(DATA['batch'][:, i, :])
            assert_array_equal(h1, h2)
            assert_array_equal(m1, m2)
Beispiel #3
0
def weigths_initializer_factory(name, seed=1234):
    if name == "uniform":
        return initer.UniformInitializer(seed)
    elif name == "zeros":
        return initer.ZerosInitializer(seed)
    elif name == "diagonal":
        return initer.DiagonalInitializer(seed)
    elif name == "orthogonal":
        return OrthogonalInitializer(seed)
    elif name == "gaussian":
        return initer.GaussienInitializer(seed)

    raise NotImplementedError("Unknown: " + str(name))
Beispiel #4
0
    def test_fprop_mask_vs_not_mask(self):
        activation = "tanh"
        seed = 1234
        repeat = 100

        lstm = LSTM(
            input_size=DATA['features_size'],
            hidden_sizes=[DATA['hidden_size']],
        )

        lstm.initialize(initer.UniformInitializer(seed))

        lstm2 = LSTMFast(
            input_size=DATA['features_size'],
            hidden_sizes=[DATA['hidden_size']],
        )
        lstm2.mask = sharedX(DATA['mask'])
        # Wi, Wo, Wf, Wm
        # Make sure the weights are the same.
        lstm2.layers_lstm[0].W.set_value(
            np.concatenate([
                lstm.layers_lstm[0].Wi.get_value(),
                lstm.layers_lstm[0].Wo.get_value(),
                lstm.layers_lstm[0].Wf.get_value(),
                lstm.layers_lstm[0].Wm.get_value()
            ],
                           axis=1))
        lstm2.layers_lstm[0].U.set_value(
            np.concatenate([
                lstm.layers_lstm[0].Ui.get_value(),
                lstm.layers_lstm[0].Uo.get_value(),
                lstm.layers_lstm[0].Uf.get_value(),
                lstm.layers_lstm[0].Um.get_value()
            ],
                           axis=1))

        input = T.tensor3('input')
        input.tag.test_value = DATA['batch']

        fprop = theano.function([input], lstm.get_output(input))
        fprop2 = theano.function([input], lstm2.get_output(input))
        # fprop_time = measure("out = fprop(DATA['batch'])", repeat)
        # print("fprop time: {:.2f} sec.", fprop_time)
        out = fprop(DATA['batch'])
        out2 = fprop2(DATA['batch'])

        assert_true(out.sum != out2.sum())
        assert_array_equal((out * DATA['mask'][:, :, None]),
                           (out2 * DATA['mask'][:, :, None]))
Beispiel #5
0
    def test_fprop_faster(self):
        seed = 1234
        repeat = 100

        lstm = LSTM(
            input_size=DATA['features_size'],
            hidden_sizes=[DATA['hidden_size']],
        )

        lstm.initialize(initer.UniformInitializer(seed))

        lstm2 = LSTMFaster(
            input_size=DATA['features_size'],
            hidden_sizes=[DATA['hidden_size']],
        )
        # Wi, Wo, Wf, Wm
        # Make sure the weights are the same.
        lstm2.layers_lstm[0].W.set_value(
            np.concatenate([
                lstm.layers_lstm[0].Wi.get_value(),
                lstm.layers_lstm[0].Wo.get_value(),
                lstm.layers_lstm[0].Wf.get_value(),
                lstm.layers_lstm[0].Wm.get_value()
            ],
                           axis=1))
        lstm2.layers_lstm[0].U.set_value(
            np.concatenate([
                lstm.layers_lstm[0].Ui.get_value(),
                lstm.layers_lstm[0].Uo.get_value(),
                lstm.layers_lstm[0].Uf.get_value(),
                lstm.layers_lstm[0].Um.get_value()
            ],
                           axis=1))

        input = T.tensor3('input')
        input.tag.test_value = DATA['batch']

        fprop = theano.function([input], lstm.get_output(input))
        fprop2 = theano.function([input], lstm2.get_output(input))
        fprop_time = measure("out = fprop(DATA['batch'])", repeat)
        print("fprop time: {:.2f} sec.", fprop_time)
        fprop2_time = measure("out = fprop2(DATA['batch'])", repeat)
        print("fprop faster time: {:.2f} sec.", fprop2_time)
        print("Speedup: {:.2f}x".format(fprop_time / fprop2_time))

        out = fprop(DATA['batch'])
        out2 = fprop2(DATA['batch'])
        assert_array_equal(out, out2)
    def _build_trainer(nb_epochs, optimizer_cls):
        print(
            "Will build a trainer is going to train a Perceptron for {0} epochs."
            .format(nb_epochs))

        print("Building model")
        model = Perceptron(trainset.input_size, nb_classes)
        model.initialize(initer.UniformInitializer(random_seed=1234))

        print("Building optimizer")
        loss = NLL(model, trainset)
        optimizer = optimizer_cls(loss=loss)
        print("Optimizer: {}".format(type(optimizer).__name__))
        #optimizer = SGD(loss=loss)
        #optimizer.append_direction_modifier(ConstantLearningRate(0.1))

        # Use mini batches of 100 examples.
        batch_scheduler = MiniBatchScheduler(trainset, 100)

        print("Building trainer")
        trainer = Trainer(optimizer, batch_scheduler)

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)

        # Print NLL mean/stderror.
        nll = views.LossView(loss=NLL(model, validset),
                             batch_scheduler=FullBatchScheduler(validset))
        logger = tasks.Logger(loss_monitor, avg_loss, nll.mean)
        trainer.append_task(logger, avg_loss)

        # Train for `nb_epochs` epochs (stopping criteria should be added at the end).
        trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs))

        return trainer, nll, logger
Beispiel #7
0
    def test_fprop(self):
        activation = "tanh"
        seed = 1234
        repeat = 1000

        layer = LayerLSTM(input_size=DATA['features_size'],
                          hidden_size=DATA['hidden_size'],
                          activation=activation)

        layer.initialize(initer.UniformInitializer(seed))

        # input = T.tensor3('input')
        input = T.matrix('input')
        input.tag.test_value = DATA['batch_one_step']
        last_h = sharedX(DATA['state_h'])
        last_m = sharedX(DATA['state_m'])

        fprop = theano.function([input],
                                layer.fprop_faster(input, last_h, last_m))
        fprop_time = measure("h, m = fprop(DATA['batch_one_step'])", repeat)
        print("fprop time: {:.2f} sec.", fprop_time)
        h, m = fprop(DATA['batch_one_step'])
Beispiel #8
0
 def initialize(self, weights_initializer=initer.UniformInitializer(1234)):
     for layer in self.layers:
         layer.initialize(weights_initializer)
 def initialize(
     self, weights_initializer=initer.UniformInitializer(random_seed=1234)):
     weights_initializer(self.W)
Beispiel #10
0
 def initialize(self, weights_initializer=initer.UniformInitializer(1234)):
     super().initialize(weights_initializer)
     self.layer_regression.initialize(weights_initializer)
Beispiel #11
0
 def initialize(self, weights_initializer=initer.UniformInitializer(1234)):
     super().initialize(weights_initializer)
     self.layer_regression.initialize(weights_initializer)
     if self.learn_to_stop:
         self.layer_stopping.initialize(weights_initializer)
 def initialize(self, weights_initializer=initer.UniformInitializer(1234)):
     super().initialize(weights_initializer)
     self.layer_classification.initialize(weights_initializer)
Beispiel #13
0
 def initialize(self, weights_initializer=initer.UniformInitializer(1234)):
     weights_initializer(self.W)