Exemple #1
0
def test_serialization_guard():

    # tests that Train refuses to serialize the dataset

    dim = 2
    m = 11

    rng = np.random.RandomState([28,9,2012])
    X = rng.randn(m, dim)
    dataset = DenseDesignMatrix(X=X)

    model = DummyModel(dim)
    # make the dataset part of the model, so it will get
    # serialized
    model.dataset = dataset

    Monitor.get_monitor(model)

    algorithm = DummyAlgorithm()

    train = Train(dataset, model, algorithm, save_path='_tmp_unit_test.pkl',
                 save_freq=1, extensions=None)

    try:
        train.main_loop()
    except RuntimeError:
        return
    assert False # train did not complain, this is a bug
Exemple #2
0
 def channel_scaling_checker(num_examples, mode, num_batches, batch_size):
     num_features = 2
     monitor = Monitor(DummyModel(num_features))
     dataset = DummyDataset(num_examples, num_features)
     monitor.add_dataset(dataset=dataset, mode=mode,
                             num_batches=num_batches, batch_size=batch_size)
     vis_batch = T.matrix()
     mean = vis_batch.mean()
     data_specs = (monitor.model.get_input_space(),
                   monitor.model.get_input_source())
     monitor.add_channel(name='mean', ipt=vis_batch, val=mean, dataset=dataset,
                         data_specs=data_specs)
     monitor()
     assert 'mean' in monitor.channels
     mean = monitor.channels['mean']
     assert len(mean.val_record) == 1
     actual = mean.val_record[0]
     X = dataset.get_design_matrix()
     if batch_size is not None and num_batches is not None:
         total = min(num_examples, num_batches * batch_size)
     else:
         total = num_examples
     expected = X[:total].mean()
     if not np.allclose(expected, actual):
         raise AssertionError("Expected monitor to contain %f but it has "
                              "%f" % (expected, actual))
Exemple #3
0
def test_serialization_guard():

    # tests that Train refuses to serialize the dataset

    dim = 2
    m = 11

    rng = np.random.RandomState([28, 9, 2012])
    X = rng.randn(m, dim)
    dataset = DenseDesignMatrix(X=X)

    model = DummyModel(dim)
    # make the dataset part of the model, so it will get
    # serialized
    model.dataset = dataset

    Monitor.get_monitor(model)

    algorithm = DummyAlgorithm()

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path='_tmp_unit_test.pkl',
                  save_freq=1,
                  callbacks=None)

    try:
        train.main_loop()
    except RuntimeError:
        return
    assert False  # train did not complain, this is a bug
Exemple #4
0
 def setup_monitor(self):
     if self.topo_view:
         print "topo view"
         self.minibatch = T.as_tensor_variable(
                     self.valid_ddm.get_batch_topo(self.batch_size), 
                     name='minibatch'
                 )
     else:
         print "design view"
         batch = self.valid_ddm.get_batch_design(self.batch_size)
         if isinstance(batch, spp.csr_matrix):
             print "sparse2"
             self.minibatch = self.model.get_input_space().make_batch_theano()
             print type(self.minibatch)
         else:
             self.minibatch = T.as_tensor_variable(
                     self.valid_ddm.get_batch_design(self.batch_size), 
                     name='minibatch'
                 )
                     
     self.target = T.matrix('target')  
     
     self.monitor = Monitor.get_monitor(self.model)
     self.log_channel_names = []
     self.log_channel_names.extend(self.base_channel_names)
     
     self.monitor.add_dataset(self.valid_ddm, 'sequential', 
                                 self.batch_size)
     if self.test_ddm is not None:
         self.monitor.add_dataset(self.test_ddm, 'sequential', 
                                     self.batch_size)
Exemple #5
0
def test_reject_empty():

    # Test that Monitor raises an error if asked to iterate over 0 batches

    BATCH_SIZE = 2
    num_examples = BATCH_SIZE
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    monitoring_dataset = DummyDataset(num_examples = num_examples,
            num_features = NUM_FEATURES)

    monitor.add_dataset(monitoring_dataset, 'sequential', batch_size=BATCH_SIZE,
            num_batches = 0)

    name = 'z'

    monitor.add_channel(name = name,
            ipt = model.input_space.make_theano_batch(),
            val = 0.,
            data_specs=(model.get_input_space(), model.get_input_source()))

    try:
        monitor()
    except ValueError:
        return
    assert False
Exemple #6
0
def test_ambig_data():

    # test that the right error is raised if you
    # add a channel to a monitor that has multiple datasets
    # and don't specify the dataset

    BATCH_SIZE = 2
    num_examples = BATCH_SIZE
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    first = DummyDataset(num_examples = num_examples,
            num_features = NUM_FEATURES)
    second = DummyDataset(num_examples = num_examples,
            num_features = NUM_FEATURES)

    monitor.add_dataset(first, 'sequential', batch_size=BATCH_SIZE)
    monitor.add_dataset(second, 'sequential', batch_size=BATCH_SIZE)


    name = 'num_prereq_calls'

    try:
        monitor.add_channel(name = name,
            ipt = model.input_space.make_theano_batch(),
            val = 0.,
            data_specs=(model.get_input_space(), model.get_input_source()))
    except ValueError, e:
        assert exc_message(e) == _err_ambig_data
        return
def test_prereqs_batch():

    # Test that prereqs get run before each monitoring batch

    BATCH_SIZE = 2
    num_examples = 2 * BATCH_SIZE
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    monitoring_dataset = DummyDataset(num_examples=num_examples, num_features=NUM_FEATURES)

    monitor.add_dataset(monitoring_dataset, "sequential", batch_size=BATCH_SIZE)

    sign = sharedX(1.0)

    def prereq(X, y):
        sign.set_value(-sign.get_value())

    name = "batches_should_cancel_to_0"

    monitor.add_channel(name=name, ipt=model.input_space.make_theano_batch(), val=sign, prereqs=[prereq])

    channel = monitor.channels[name]

    assert len(channel.val_record) == 0
    monitor()
    assert channel.val_record == [0]
    monitor()
    assert channel.val_record == [0, 0]
Exemple #8
0
    def setup(self, model, dataset):
        """
        Allows the training algorithm to do some preliminary configuration
        *before* we actually start training the model. The dataset is provided
        in case other derived training algorithms need to modify model based on
        the dataset.

        Parameters
        ----------
        model: a Python object representing the model to train loosely
        implementing the interface of models.model.Model.

        dataset: a pylearn2.datasets.dataset.Dataset object used to draw
        training data
        """
        self.model = model

        self.monitor = Monitor.get_monitor(model)

        if self.monitoring_dataset is not None:
            # Get the data specifications needed by the model
            space, source = model.get_monitoring_data_specs()

            # Create Theano variables for each of the individual components
            # of that data. Usually, it will be X for inputs and Y for targets.
            # First, we need to find these components, and put them in a tuple
            mapping = DataSpecsMapping((space, source))
            space_tuple = mapping.flatten(space, return_tuple=True)
            source_tuple = mapping.flatten(source, return_tuple=True)
            # Then, build a flat tuple of these Theano variables
            ipt = tuple(sp.make_theano_batch(name='monitor_%s' % src)
                    for (sp, src) in safe_zip(space_tuple, source_tuple))
            # Finally, organize them back into a structure expected by the
            # monitoring channels of the model
            nested_ipt = mapping.nest(ipt)

            self.monitor.add_dataset(dataset=self.monitoring_dataset,
                                mode="sequential",
                                batch_size=self.batch_size,
                                num_batches=self.monitoring_batches)

            channels = model.get_monitoring_channels(nested_ipt)
            if not isinstance(channels, dict):
                raise TypeError("model.get_monitoring_channels must return a "
                                "dictionary, but it returned " + str(channels))
            for name in channels:
                J = channels[name]
                if isinstance(J, tuple):
                    assert len(J) == 2
                    J, prereqs = J
                else:
                    prereqs = None

                self.monitor.add_channel(name=name,
                                         ipt=nested_ipt,
                                         val=J,
                                         prereqs=prereqs,
                                         data_specs=(space, source))
        self.first = True
        self.bSetup = True
Exemple #9
0
def test_reject_empty():

    # Test that Monitor raises an error if asked to iterate over 0 batches

    BATCH_SIZE = 2
    num_examples = BATCH_SIZE
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    monitoring_dataset = DummyDataset(num_examples = num_examples,
            num_features = NUM_FEATURES)

    monitor.add_dataset(monitoring_dataset, 'sequential', batch_size=BATCH_SIZE,
            num_batches = 0)

    name = 'z'

    monitor.add_channel(name = name,
            ipt = model.input_space.make_theano_batch(),
            val = 0.,
            data_specs=(model.get_input_space(), model.get_input_source()))

    try:
        monitor()
    except ValueError:
        return
    assert False
Exemple #10
0
 def setup_monitor(self):
     if self.topo_view:
         print "topo view"
         self.minibatch = T.as_tensor_variable(
                     self.valid_ddm.get_batch_topo(self.batch_size), 
                     name='minibatch'
                 )
     else:
         print "design view"
         self.minibatch = T.as_tensor_variable(
                     self.valid_ddm.get_batch_design(self.batch_size), 
                     name='minibatch'
                 )
                     
     self.target = T.tensor3('target')  
     
     self.monitor = Monitor.get_monitor(self.model)
     self.log_channel_names = []
     self.log_channel_names.extend(self.base_channel_names)
     
     self.monitor.add_dataset(self.valid_ddm, 'sequential', 
                                 self.batch_size)
     if self.test_ddm is not None:
         self.monitor.add_dataset(self.test_ddm, 'sequential', 
                                     self.batch_size)
Exemple #11
0
def test_prereqs_batch():

    # Test that prereqs get run before each monitoring batch

    BATCH_SIZE = 2
    num_examples = 2 * BATCH_SIZE
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    monitoring_dataset = DummyDataset(num_examples = num_examples,
            num_features = NUM_FEATURES)

    monitor.add_dataset(monitoring_dataset, 'sequential', batch_size=BATCH_SIZE)

    sign = sharedX(1.)
    def prereq(X,y):
        sign.set_value(
                -sign.get_value())

    name = 'batches_should_cancel_to_0'

    monitor.add_channel(name = name,
            ipt = model.input_space.make_theano_batch(),
            val = sign,
            prereqs = [ prereq ])

    channel = monitor.channels[name]

    assert len(channel.val_record) == 0
    monitor()
    assert channel.val_record == [0]
    monitor()
    assert channel.val_record == [0,0]
Exemple #12
0
def test_prereqs():

    # Test that prereqs get run before the monitoring channels are computed

    BATCH_SIZE = 2
    num_examples = BATCH_SIZE
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    monitoring_dataset = DummyDataset(num_examples = num_examples,
            num_features = NUM_FEATURES)

    monitor.add_dataset(monitoring_dataset, 'sequential', batch_size=BATCH_SIZE)

    prereq_counter = sharedX(0.)
    def prereq(X,y):
        prereq_counter.set_value(
                prereq_counter.get_value()+1.)

    name = 'num_prereq_calls'

    monitor.add_channel(name = name,
            ipt = model.input_space.make_theano_batch(),
            val = prereq_counter,
            prereqs = [ prereq ])

    channel = monitor.channels[name]

    assert len(channel.val_record) == 0
    monitor()
    assert channel.val_record == [1]
    monitor()
    assert channel.val_record == [1,2]
Exemple #13
0
    def main_loop(self):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.
        """
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.run_callbacks_and_monitoring()
            while self.model.train(dataset=self.dataset):
                self.run_callbacks_and_monitoring()
                if self.save_freq > 0 and self.epochs % self.save_freq == 0:
                    self.save()
                self.epochs += 1
            self.run_callbacks_and_monitoring()
            if self.save_freq > 0:
                self.save()
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            self.run_callbacks_and_monitoring()
            epoch_start = datetime.datetime.now()
            while self.algorithm.train(dataset=self.dataset):
                epoch_end = datetime.datetime.now()
                print 'Time this epoch:', str(epoch_end - epoch_start)
                epoch_start = datetime.datetime.now()
                self.run_callbacks_and_monitoring()
                if self.save_freq > 0 and self.epochs % self.save_freq == 0:
                    self.save()
                self.epochs += 1
            self.run_callbacks_and_monitoring()

            if self.save_freq > 0:
                self.save()
Exemple #14
0
def test_save_load_save():

    """
    Test that a monitor can be saved, then loaded, and then the loaded
    copy can be saved again.
    This only tests that the serialization and deserialization processes
    don't raise an exception. It doesn't test for correctness at all.
    """

    model = DummyModel(1)
    monitor = Monitor.get_monitor(model)

    num_examples = 2
    num_features = 3
    num_batches = 1
    batch_size = 2

    dataset = DummyDataset(num_examples, num_features)
    monitor.add_dataset(dataset=dataset,
                            num_batches=num_batches, batch_size=batch_size)
    vis_batch = T.matrix()
    mean = vis_batch.mean()
    data_specs = (monitor.model.get_input_space(),
                  monitor.model.get_input_source())
    monitor.add_channel(name='mean', ipt=vis_batch, val=mean, dataset=dataset,
                        data_specs=data_specs)

    saved = to_string(monitor)
    monitor = from_string(saved)
    saved_again = to_string(monitor)
Exemple #15
0
def test_ambig_data():

    # test that the right error is raised if you
    # add a channel to a monitor that has multiple datasets
    # and don't specify the dataset

    BATCH_SIZE = 2
    num_examples = BATCH_SIZE
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    first = DummyDataset(num_examples = num_examples,
            num_features = NUM_FEATURES)
    second = DummyDataset(num_examples = num_examples,
            num_features = NUM_FEATURES)

    monitor.add_dataset(first, 'sequential', batch_size=BATCH_SIZE)
    monitor.add_dataset(second, 'sequential', batch_size=BATCH_SIZE)


    name = 'num_prereq_calls'

    try:
        monitor.add_channel(name = name,
            ipt = model.input_space.make_theano_batch(),
            val = 0.,
            data_specs=(model.get_input_space(), model.get_input_source()))
    except ValueError as e:
        assert exc_message(e) == _err_ambig_data
        return
    assert False
Exemple #16
0
    def main_loop(self):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.
        """
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.run_callbacks_and_monitoring()
            while self.model.train(dataset=self.dataset):
                self.run_callbacks_and_monitoring()
                if self.save_freq > 0 and self.epochs % self.save_freq == 0:
                    self.save()
                self.epochs += 1
            self.run_callbacks_and_monitoring()
            if self.save_freq > 0:
                self.save()
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            self.run_callbacks_and_monitoring()
            epoch_start = datetime.datetime.now()
            while self.algorithm.train(dataset=self.dataset):
                epoch_end = datetime.datetime.now()
                print 'Time this epoch:', str(epoch_end - epoch_start)
                epoch_start = datetime.datetime.now()
                self.run_callbacks_and_monitoring()
                if self.save_freq > 0 and self.epochs % self.save_freq == 0:
                    self.save()
                self.epochs += 1
            self.run_callbacks_and_monitoring()

            if self.save_freq > 0:
                self.save()
Exemple #17
0
def test_save_load_save():
    """
    Test that a monitor can be saved, then loaded, and then the loaded
    copy can be saved again.
    This only tests that the serialization and deserialization processes
    don't raise an exception. It doesn't test for correctness at all.
    """

    model = DummyModel(1)
    monitor = Monitor.get_monitor(model)

    num_examples = 2
    num_features = 3
    num_batches = 1
    batch_size = 2

    dataset = DummyDataset(num_examples, num_features)
    monitor.add_dataset(dataset=dataset,
                        num_batches=num_batches,
                        batch_size=batch_size)
    vis_batch = T.matrix()
    mean = vis_batch.mean()
    data_specs = (monitor.model.get_input_space(),
                  monitor.model.get_input_source())
    monitor.add_channel(name='mean',
                        ipt=vis_batch,
                        val=mean,
                        dataset=dataset,
                        data_specs=data_specs)

    saved = to_string(monitor)
    monitor = from_string(saved)
    saved_again = to_string(monitor)
Exemple #18
0
def test_prereqs():

    # Test that prereqs get run before the monitoring channels are computed

    BATCH_SIZE = 2
    num_examples = BATCH_SIZE
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    monitoring_dataset = DummyDataset(num_examples = num_examples,
            num_features = NUM_FEATURES)

    monitor.add_dataset(monitoring_dataset, 'sequential', batch_size=BATCH_SIZE)

    prereq_counter = sharedX(0.)
    def prereq(*data):
        prereq_counter.set_value(prereq_counter.get_value() + 1.)

    name = 'num_prereq_calls'

    monitor.add_channel(name = name,
            ipt = model.input_space.make_theano_batch(),
            val = prereq_counter,
            prereqs = [ prereq ],
            data_specs=(model.get_input_space(), model.get_input_source()))

    channel = monitor.channels[name]

    assert len(channel.val_record) == 0
    monitor()
    assert channel.val_record == [1]
    monitor()
    assert channel.val_record == [1,2]
Exemple #19
0
 def prep_valtest_monitor(self, model, batch_size):
     if self.topo_view:
         print "topo view"
         minibatch = T.as_tensor_variable(
                         self.valid_ddm.get_batch_topo(batch_size), 
                         name='minibatch'
                     )
     else:
         print "design view"
         minibatch = T.as_tensor_variable(
                         self.valid_ddm.get_batch_design(batch_size), 
                         name='minibatch'
                     )
     target = T.matrix('target')
     valMSE = MissingTargetCost()(model, minibatch, target)
     monitor = Monitor.get_monitor(model)
     
     monitor.add_dataset(self.valid_ddm, 'sequential', batch_size)
     monitor.add_channel("Validation MSE",
                         (minibatch, target),
                         valMSE,
                         self.valid_ddm)
                         
     if self.test_ddm is not None:
         monitor.add_dataset(self.test_ddm, 'sequential', batch_size)
         monitor.add_channel("Test MSE",
                             (minibatch, target),
                             valMSE,
                             self.test_ddm)
Exemple #20
0
def test_large_examples():
    BATCH_SIZE = 10000
    num_examples = 60002994
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    monitoring_dataset = DummyDataset(num_examples=num_examples,
                                      num_features=NUM_FEATURES)

    monitor.add_dataset(monitoring_dataset,
                        'sequential',
                        batch_size=BATCH_SIZE)

    name = 'z'

    monitor.add_channel(name=name,
                        ipt=model.input_space.make_theano_batch(),
                        val=0.,
                        data_specs=(model.get_input_space(),
                                    model.get_input_source()))

    try:
        monitor()
    except RuntimeError:
        assert False
Exemple #21
0
 def prep_valtest_monitor(self, model, batch_size):
     minibatch = T.as_tensor_variable(
                     self.valid_ddm.get_batch_topo(batch_size), 
                     name='minibatch'
                 )
     target = T.matrix('target')
     Accuracy = self.get_classification_accuracy(model, minibatch, target)           
     monitor = Monitor.get_monitor(model)
     
     monitor.add_dataset(self.valid_ddm, 'sequential', batch_size)
     monitor.add_channel("Validation Classification Accuracy",
                         (minibatch, target),
                         Accuracy,
                         self.valid_ddm)
     monitor.add_channel("Validation Missclassification",
                         (minibatch, target),
                         1.0-Accuracy,
                         self.valid_ddm)
                         
     if self.test_ddm is not None:
         monitor.add_dataset(self.test_ddm, 'sequential', batch_size)
         monitor.add_channel("Test Classification Accuracy",
                             (minibatch, target),
                             Accuracy,
                             self.test_ddm)
Exemple #22
0
    def setup_monitor(self):
        if self.topo_view:
            print "topo view"
            self.minibatch = T.as_tensor_variable(
                self.valid_ddm.get_batch_topo(self.batch_size),
                name='minibatch')
        else:
            print "design view"
            batch = self.valid_ddm.get_batch_design(self.batch_size)
            if isinstance(batch, spp.csr_matrix):
                print "sparse2"
                self.minibatch = self.model.get_input_space(
                ).make_batch_theano()
                print type(self.minibatch)
            else:
                self.minibatch = T.as_tensor_variable(
                    self.valid_ddm.get_batch_design(self.batch_size),
                    name='minibatch')

        self.target = T.matrix('target')

        self.monitor = Monitor.get_monitor(self.model)
        self.log_channel_names = []
        self.log_channel_names.extend(self.base_channel_names)

        self.monitor.add_dataset(self.valid_ddm, 'sequential', self.batch_size)
        if self.test_ddm is not None:
            self.monitor.add_dataset(self.test_ddm, 'sequential',
                                     self.batch_size)
Exemple #23
0
    def setup(self, model, dataset):
        """
        Initialize the training algorithm. Should be called
        once before calls to train.

        Parameters
        ----------
        model : object
            Model to be trained.  Object implementing the pylearn2 Model
            interface.
        dataset : object
            Dataset on which to train.  Object implementing the
            pylearn2 Dataset interface.
        """

        self.model = model

        self.monitor = Monitor.get_monitor(model)
        self.monitor.set_dataset(dataset=self.monitoring_dataset,
                                 batches=self.monitoring_batches,
                                 batch_size=self.batch_size)

        X = T.matrix(name='sgd_X')
        J = self.cost(model, X)
        if J.name is None:
            J.name = 'sgd_cost(' + X.name + ')'
        self.monitor.add_channel(name=J.name, ipt=X, val=J)
        params = model.get_params()

        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i

        grads = dict(zip(params, T.grad(J, params)))

        for param in grads:
            if grads[param].name is None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                     {'costname': J.name,
                                      'paramname': param.name})

        learning_rate = T.scalar('sgd_learning_rate')

        updates = dict(zip(params, [param - learning_rate * grads[param]
                                    for param in params]))

        for param in updates:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'

        model.censor_updates(updates)
        for param in updates:
            if updates[param] is None:
                updates[param].name = 'censor(sgd_update(' + param.name + '))'

        self.sgd_update = function([X, learning_rate], updates=updates,
                                   name='sgd_update')
        self.params = params
        self.bSetup = True
Exemple #24
0
    def main_loop(self):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.
        """
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.setup_extensions()
            self.run_callbacks_and_monitoring()
            while True:
                rval = self.model.train_all(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("Model.train_all should not return anything. Use Model.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                if self.save_freq > 0 and self.model.monitor.epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning = self.model.continue_learning()
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            self.setup_extensions()
            if not hasattr(self.model, 'monitor'):
                # TODO: is this really necessary? I just put this error here
                # to prevent an AttributeError later, but I think we could
                # rewrite to avoid the AttributeError
                raise RuntimeError("The algorithm is responsible for setting"
                        " up the Monitor, but failed to.")
            if len(self.model.monitor._datasets)>0:
                # This monitoring channel keeps track of a shared variable,
                # which does not need inputs nor data.
                self.model.monitor.add_channel(name="monitor_seconds_per_epoch",
                                               ipt=None,
                                               val=self.monitor_time,
                                               data_specs=(NullSpace(), ''),
                                               dataset=self.model.monitor._datasets[0])
            self.run_callbacks_and_monitoring()
            while True:
                with log_timing(log, None, final_msg='Time this epoch:',
                                callbacks=[self.monitor_time.set_value]):
                    import time
                    print 'current time', time.strftime("%H:%M:%S")
                    rval = self.algorithm.train(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("TrainingAlgorithm.train should not return anything. Use TrainingAlgorithm.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                self.run_callbacks_and_monitoring()
                if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning =  self.algorithm.continue_learning(self.model)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break

        self.model.monitor.training_succeeded = True

        if self.save_freq > 0:
            self.save()
Exemple #25
0
    def main_loop(self):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.
        """
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.setup_extensions()
            self.run_callbacks_and_monitoring()
            while True:
                rval = self.model.train_all(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("Model.train_all should not return anything. Use Model.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                if self.save_freq > 0 and self.model.monitor.epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning = self.model.continue_learning()
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            self.setup_extensions()
            if not hasattr(self.model, 'monitor'):
                # TODO: is this really necessary? I just put this error here
                # to prevent an AttributeError later, but I think we could
                # rewrite to avoid the AttributeError
                raise RuntimeError("The algorithm is responsible for setting"
                        " up the Monitor, but failed to.")
            if len(self.model.monitor._datasets)>0:
                # This monitoring channel keeps track of a shared variable,
                # which does not need inputs nor data.
                self.model.monitor.add_channel(name="monitor_seconds_per_epoch",
                                               ipt=None,
                                               val=self.monitor_time,
                                               data_specs=(NullSpace(), ''),
                                               dataset=self.model.monitor._datasets[0])
            self.run_callbacks_and_monitoring()
            while True:
                with log_timing(log, None, final_msg='Time this epoch:',
                                callbacks=[self.monitor_time.set_value]):
                    rval = self.algorithm.train(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("TrainingAlgorithm.train should not return anything. Use TrainingAlgorithm.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                self.run_callbacks_and_monitoring()
                if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning =  self.algorithm.continue_learning(self.model)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break

        self.model.monitor.training_succeeded = True

        if self.save_freq > 0:
            self.save()
Exemple #26
0
def test_deserialize():

    # Test that a monitor can be deserialized

    model = DummyModel(1)
    monitor = Monitor.get_monitor(model)

    x = to_string(monitor)
    monitor = from_string(x)
    y = to_string(monitor)
Exemple #27
0
def test_deserialize():

    # Test that a monitor can be deserialized

    model = DummyModel(1)
    monitor = Monitor.get_monitor(model)

    x = to_string(monitor)
    monitor = from_string(x)
    y = to_string(monitor)
Exemple #28
0
def test_sgd_unspec_num_mon_batch():

    # tests that if you don't specify a number of
    # monitoring batches, SGD configures the monitor
    # to run on all the data

    m = 25

    visited = [False] * m
    rng = np.random.RandomState([25, 9, 2012])
    X = np.zeros((m, 1))
    X[:, 0] = np.arange(m)
    dataset = DenseDesignMatrix(X=X)

    model = SoftmaxModel(1)

    learning_rate = 1e-3
    batch_size = 5

    cost = DummyCost()

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=5,
                    monitoring_batches=None,
                    monitoring_dataset=dataset,
                    termination_criterion=None,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    algorithm.setup(dataset=dataset, model=model)

    monitor = Monitor.get_monitor(model)

    X = T.matrix()

    def tracker(*data):
        X, = data
        assert X.shape[1] == 1
        for i in xrange(X.shape[0]):
            visited[int(X[i, 0])] = True

    monitor.add_channel(name='tracker',
                        ipt=X,
                        val=0.,
                        prereqs=[tracker],
                        data_specs=(model.get_input_space(),
                                    model.get_input_source()))

    monitor()

    if False in visited:
        print visited
        assert False
Exemple #29
0
def test_sgd_unspec_num_mon_batch():

    # tests that if you don't specify a number of
    # monitoring batches, SGD configures the monitor
    # to run on all the data

    m = 25

    visited = [False] * m
    rng = np.random.RandomState([25, 9, 2012])
    X = np.zeros((m, 1))
    X[:, 0] = np.arange(m)
    dataset = DenseDesignMatrix(X=X)

    model = SoftmaxModel(1)

    learning_rate = 1e-3
    batch_size = 5

    cost = DummyCost()

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=batch_size,
                    monitoring_batches=None,
                    monitoring_dataset=dataset,
                    termination_criterion=None,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    algorithm.setup(dataset=dataset, model=model)

    monitor = Monitor.get_monitor(model)

    X = T.matrix()

    def tracker(*data):
        X, = data
        assert X.shape[1] == 1
        for i in xrange(X.shape[0]):
            visited[int(X[i, 0])] = True

    monitor.add_channel(name='tracker',
                        ipt=X,
                        val=0.,
                        prereqs=[tracker],
                        data_specs=(model.get_input_space(),
                                    model.get_input_source()))

    monitor()

    if False in visited:
        print visited
        assert False
Exemple #30
0
def test_serialize_twice():

    # Test that a monitor can be serialized twice
    # with the same result

    model = DummyModel(1)
    monitor = Monitor.get_monitor(model)

    x = to_string(monitor)
    y = to_string(monitor)

    assert x == y
Exemple #31
0
def test_serialize_twice():

    # Test that a monitor can be serialized twice
    # with the same result

    model = DummyModel(1)
    monitor = Monitor.get_monitor(model)

    x = to_string(monitor)
    y = to_string(monitor)

    assert x == y
Exemple #32
0
def test_reject_bad_add_dataset():

    model = DummyModel(1)
    monitor = Monitor.get_monitor(model)
    dataset = DummyDataset(1, 1)

    try:
        monitor.add_dataset([dataset], mode=["sequential", "shuffled"])
    except ValueError:
        return

    raise AssertionError("Monitor.add_dataset accepted bad arguments to " "add_dataset.")
Exemple #33
0
    def main_loop(self):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.
        """
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.run_callbacks_and_monitoring()
            while True:
                rval = self.model.train_all(dataset=self.dataset)
                if rval is not None:
                    raise ValueError(
                        "Model.train_all should not return anything. Use Model.continue_learning to control whether learning continues."
                    )
                self.model.monitor.report_epoch()
                if self.save_freq > 0 and self.model.monitor.epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning = self.continue_learning()
                assert continue_learning in [True, False]
                if not continue_learning:
                    break
            if self.save_freq > 0:
                self.save()
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            if not hasattr(self.model, 'monitor'):
                # TODO: is this really necessary? I just put this error here
                # to prevent an AttributeError later, but I think we could
                # rewrite to avoid the AttributeError
                raise RuntimeError("The algorithm is responsible for setting"
                                   " up the Monitor, but failed to.")
            self.run_callbacks_and_monitoring()
            while True:
                epoch_start = datetime.datetime.now()
                rval = self.algorithm.train(dataset=self.dataset)
                epoch_end = datetime.datetime.now()
                print 'Time this epoch:', str(epoch_end - epoch_start)
                if rval is not None:
                    raise ValueError(
                        "TrainingAlgorithm.train should not return anything. Use TrainingAlgorithm.continue_learning to control whether learning continues."
                    )
                self.model.monitor.report_epoch()
                self.run_callbacks_and_monitoring()
                if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning = self.algorithm.continue_learning(
                    self.model)
                assert continue_learning in [True, False]
                if not continue_learning:
                    break

            if self.save_freq > 0:
                self.save()
Exemple #34
0
 def channel_scaling_checker(num_examples, mode, num_batches, batch_size):
     num_features = 2
     monitor = Monitor(DummyModel(num_features))
     dataset = DummyDataset(num_examples, num_features)
     try:
         monitor.add_dataset(dataset=dataset, mode=mode,
                             num_batches=num_batches, batch_size=batch_size)
     except NotImplementedError:
         # make sure this was due to the unimplemented batch_size case
         if num_batches is None:
             assert num_examples % batch_size != 0
         else:
             assert num_examples % num_batches != 0
         raise SkipTest()
     vis_batch = T.matrix()
     mean = vis_batch.mean()
     monitor.add_channel(name='mean', ipt=vis_batch, val=mean, dataset=dataset)
     monitor()
     assert 'mean' in monitor.channels
     mean = monitor.channels['mean']
     assert len(mean.val_record) == 1
     actual = mean.val_record[0]
     X = dataset.get_design_matrix()
     if batch_size is not None and num_batches is not None:
         total = min(num_examples, num_batches * batch_size)
     else:
         total = num_examples
     expected = X[:total].mean()
     if not np.allclose(expected, actual):
         raise AssertionError("Expected monitor to contain %f but it has "
                              "%f" % (expected, actual))
Exemple #35
0
 def channel_scaling_checker(num_examples, mode, num_batches, batch_size):
     num_features = 2
     monitor = Monitor(DummyModel(num_features))
     dataset = DummyDataset(num_examples, num_features)
     monitor.add_dataset(dataset=dataset, mode=mode,
                             num_batches=num_batches, batch_size=batch_size)
     vis_batch = T.matrix()
     mean = vis_batch.mean()
     data_specs = (monitor.model.get_input_space(),
                   monitor.model.get_input_source())
     monitor.add_channel(name='mean', ipt=vis_batch, val=mean, dataset=dataset,
                         data_specs=data_specs)
     monitor()
     assert 'mean' in monitor.channels
     mean = monitor.channels['mean']
     assert len(mean.val_record) == 1
     actual = mean.val_record[0]
     X = dataset.get_design_matrix()
     if batch_size is not None and num_batches is not None:
         total = min(num_examples, num_batches * batch_size)
     else:
         total = num_examples
     expected = X[:total].mean()
     if not np.allclose(expected, actual):
         raise AssertionError("Expected monitor to contain %f but it has "
                              "%f" % (expected, actual))
Exemple #36
0
def test_reject_bad_add_dataset():

    model = DummyModel(1)
    monitor = Monitor.get_monitor(model)
    dataset = DummyDataset(1,1)

    try:
        monitor.add_dataset([dataset],mode=['sequential', 'shuffled'])
    except ValueError:
        return

    raise AssertionError("Monitor.add_dataset accepted bad arguments to "
            "add_dataset.")
Exemple #37
0
    def setup(self, model, dataset):
        """
        Allows the training algorithm to do some preliminary configuration
        *before* we actually start training the model. The dataset is provided
        in case other derived training algorithms need to modify model based on
        the dataset.

        Parameters
        ----------
        model: a Python object representing the model to train loosely
        implementing the interface of models.model.Model.

        dataset: a pylearn2.datasets.dataset.Dataset object used to draw
        training data
        """
        self.model = model

        self.monitor = Monitor.get_monitor(model)
        X = T.matrix()
        Y = T.matrix()
        if self.monitoring_dataset is not None:
            if not self.monitoring_dataset.has_targets():
                Y = None
            self.monitor.add_dataset(dataset=self.monitoring_dataset,
                                mode="sequential",
                                batch_size=self.batch_size,
                                num_batches=self.monitoring_batches)
            X.tag.test_value = self.monitoring_dataset.get_batch_design(2)
            channels = model.get_monitoring_channels(X,Y)
            if not isinstance(channels, dict):
                raise TypeError("model.get_monitoring_channels must return a "
                                "dictionary, but it returned " + str(channels))
            for name in channels:
                J = channels[name]
                if isinstance(J, tuple):
                    assert len(J) == 2
                    J, prereqs = J
                else:
                    prereqs = None

                if Y is not None:
                    ipt = (X,Y)
                else:
                    ipt = X

                self.monitor.add_channel(name=name,
                                         ipt=ipt,
                                         val=J,
                                         prereqs=prereqs)
        self.first = True
        self.bSetup = True
Exemple #38
0
    def setup(self, model, dataset):
        """
        Allows the training algorithm to do some preliminary configuration
        *before* we actually start training the model. The dataset is provided
        in case other derived training algorithms need to modify model based on
        the dataset.

        Parameters
        ----------
        model: a Python object representing the model to train loosely
        implementing the interface of models.model.Model.

        dataset: a pylearn2.datasets.dataset.Dataset object used to draw
        training data
        """
        self.model = model

        self.monitor = Monitor.get_monitor(model)
        X = T.matrix()
        Y = T.matrix()
        if self.monitoring_dataset is not None:
            if not self.monitoring_dataset.has_targets():
                Y = None
            self.monitor.set_dataset(dataset=self.monitoring_dataset,
                                mode="sequential",
                                batch_size=self.batch_size,
                                num_batches=self.monitoring_batches)
            X.tag.test_value = self.monitoring_dataset.get_batch_design(2)
            channels = model.get_monitoring_channels(X,Y)
            if not isinstance(channels, dict):
                raise TypeError("model.get_monitoring_channels must return a "
                                "dictionary, but it returned " + str(channels))
            for name in channels:
                J = channels[name]
                if isinstance(J, tuple):
                    assert len(J) == 2
                    J, prereqs = J
                else:
                    prereqs = None

                if Y is not None:
                    ipt = (X,Y)
                else:
                    ipt = X

                self.monitor.add_channel(name=name,
                                         ipt=ipt,
                                         val=J,
                                         prereqs=prereqs)
        self.first = True
        self.bSetup = True
Exemple #39
0
def test_prereqs_multidataset():

    # Test that prereqs are run on the right datasets

    NUM_DATASETS = 4
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    prereq_counters = []
    datasets = []
    for i in xrange(NUM_DATASETS):

        batch_size = i + 1
        num_examples = batch_size
        dataset = DummyDataset(num_examples = num_examples,
                num_features = NUM_FEATURES)
        dataset.X[:] = i
        datasets.append(dataset)

        monitor.add_dataset(dataset, 'sequential', batch_size=batch_size)

        prereq_counters.append(sharedX(0.))



    channels = []
    for i in xrange(NUM_DATASETS):
        monitor.add_channel(name = str(i),
                ipt = model.input_space.make_theano_batch(),
                val = prereq_counters[i],
                dataset = datasets[i],
                prereqs = [ ReadVerifyPrereq(i, prereq_counters[i]) ],
                data_specs=(model.get_input_space(), model.get_input_source()))

        channels.append(monitor.channels[str(i)])

    for channel in channels:
        assert len(channel.val_record) == 0
    monitor()
    for channel in channels:
        assert channel.val_record == [1]
    monitor()
    for channel in channels:
        assert channel.val_record == [1,2]

    # check that handling all these datasets did not
    # result in them getting serialized
    to_string(monitor)
Exemple #40
0
    def on_monitor(self, model, dataset, algorithm):
        monitor = Monitor.get_monitor(model)
        try:
            rsqt_msg = self.req_sock.recv_pyobj(flags=zmq.NOBLOCK)

            # Determine what type of message was received
            rsp_msg = rsqt_msg.get_response()

            if isinstance(rsp_msg, ChannelListResponse):
                rsp_msg.data = list(monitor.channels.keys())

            if isinstance(rsp_msg, ChannelsResponse):
                channel_list = rsp_msg.channel_list
                if (not isinstance(channel_list, list)
                        or len(channel_list) == 0):
                    channel_list = []
                    result = TypeError(
                        'ChannelResponse requires a list of channels.')

                result = {}
                for channel_name in channel_list:
                    if channel_name in monitor.channels.keys():
                        chan = copy.deepcopy(monitor.channels[channel_name])
                        end = rsp_msg.end
                        if end == -1:
                            end = len(chan.batch_record)
                        # TODO copying and truncating the records individually
                        # like this is brittle. Is there a more robust
                        # solution?
                        chan.batch_record = chan.batch_record[
                            rsp_msg.start:end:rsp_msg.step]
                        chan.epoch_record = chan.epoch_record[
                            rsp_msg.start:end:rsp_msg.step]
                        chan.example_record = chan.example_record[
                            rsp_msg.start:end:rsp_msg.step]
                        chan.time_record = chan.time_record[
                            rsp_msg.start:end:rsp_msg.step]
                        chan.val_record = chan.val_record[
                            rsp_msg.start:end:rsp_msg.step]
                        result[channel_name] = chan
                    else:
                        result[channel_name] = KeyError('Invalid channel: %s' %
                                                        channel_name)
                rsp_msg.data = result

            self.req_sock.send_pyobj(rsp_msg)
        except zmq.Again:
            pass

        self.counter += 1
Exemple #41
0
def test_prereqs_multidataset():

    # Test that prereqs are run on the right datasets

    NUM_DATASETS = 4
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    prereq_counters = []
    datasets = []
    for i in xrange(NUM_DATASETS):

        batch_size = i + 1
        num_examples = batch_size
        dataset = DummyDataset(num_examples = num_examples,
                num_features = NUM_FEATURES)
        dataset.X[:] = i
        datasets.append(dataset)

        monitor.add_dataset(dataset, 'sequential', batch_size=batch_size)

        prereq_counters.append(sharedX(0.))



    channels = []
    for i in xrange(NUM_DATASETS):
        monitor.add_channel(name = str(i),
                ipt = model.input_space.make_theano_batch(),
                val = prereq_counters[i],
                dataset = datasets[i],
                prereqs = [ ReadVerifyPrereq(i, prereq_counters[i]) ],
                data_specs=(model.get_input_space(), model.get_input_source()))

        channels.append(monitor.channels[str(i)])

    for channel in channels:
        assert len(channel.val_record) == 0
    monitor()
    for channel in channels:
        assert channel.val_record == [1]
    monitor()
    for channel in channels:
        assert channel.val_record == [1,2]

    # check that handling all these datasets did not
    # result in them getting serialized
    to_string(monitor)
Exemple #42
0
def test_transfer_experience():

    # Makes sure the transfer_experience flag of push_monitor works

    model = DummyModel(num_features = 3)
    monitor = Monitor.get_monitor(model)
    monitor.report_batch(2)
    monitor.report_batch(3)
    monitor.report_epoch()
    model = push_monitor(model, "old_monitor", transfer_experience=True)
    assert model.old_monitor is monitor
    monitor = model.monitor
    assert monitor.get_epochs_seen() == 1
    assert monitor.get_batches_seen() == 2
    assert monitor.get_epochs_seen() == 1
Exemple #43
0
def test_transfer_experience():

    # Makes sure the transfer_experience flag of push_monitor works

    model = DummyModel(num_features = 3)
    monitor = Monitor.get_monitor(model)
    monitor.report_batch(2)
    monitor.report_batch(3)
    monitor.report_epoch()
    model = push_monitor(model, "old_monitor", transfer_experience=True)
    assert model.old_monitor is monitor
    monitor = model.monitor
    assert monitor.get_epochs_seen() == 1
    assert monitor.get_batches_seen() == 2
    assert monitor.get_epochs_seen() == 1
Exemple #44
0
    def main_loop(self):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.
        """
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.setup_extensions()
            self.run_callbacks_and_monitoring()
            while True:
                rval = self.model.train_all(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("Model.train_all should not return anything. Use Model.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                if self.save_freq > 0 and self.model.monitor.epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning = self.model.continue_learning()
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            self.setup_extensions()
            if not hasattr(self.model, 'monitor'):
                # TODO: is this really necessary? I just put this error here
                # to prevent an AttributeError later, but I think we could
                # rewrite to avoid the AttributeError
                raise RuntimeError("The algorithm is responsible for setting"
                        " up the Monitor, but failed to.")
            self.run_callbacks_and_monitoring()
            while True:
                with log_timing(log, None, final_msg='Time this epoch:'):
                    rval = self.algorithm.train(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("TrainingAlgorithm.train should not return anything. Use TrainingAlgorithm.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                self.run_callbacks_and_monitoring()
                if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning =  self.algorithm.continue_learning(self.model)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break

        self.model.monitor.training_succeeded = True

        if self.save_freq > 0:
            self.save()
Exemple #45
0
    def setup(self, model, dataset):
        self.model = model
        self.monitor = Monitor.get_monitor(model)
        # TODO: monitoring batch size ought to be configurable
        # separately from training batch size, e.g. if you would rather
        # monitor on one somewhat big batch but update on many small
        # batches.
        self.monitor.set_dataset(dataset=self.monitoring_dataset,
                                 batches=self.monitoring_batches,
                                 batch_size=self.batch_size)
        dataset.set_iteration_scheme('sequential', batch_size=self.batch_size)
        X = T.matrix(name="%s[X]" % self.__class__.__name__)
        try:
            iter(self.cost)
            iterable_cost = True
        except TypeError:
            iterable_cost = False
        if iterable_cost:
            cost_value = sum(c(model, X) for c in self.cost)
        else:
            cost_value = self.cost(model, X)
        if cost_value.name is None:
            cost_value.name = 'sgd_cost(' + X.name + ')'
        self.monitor.add_channel(name=cost_value.name, ipt=X, val=cost_value)
        params = model.get_params()
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i
        grads = dict(zip(params, T.grad(cost_value, params)))
        for param in grads:
            if grads[param].name is None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                     {'costname': cost_value.name,
                                      'paramname': param.name})
        learning_rate = T.scalar('sgd_learning_rate')
        updates = dict(zip(params, [param - learning_rate * grads[param]
                                    for param in params]))
        for param in updates:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'
        model.censor_updates(updates)
        for param in updates:
            if updates[param] is None:
                updates[param].name = 'censor(sgd_update(' + param.name + '))'

        self.sgd_update = function([X, learning_rate], updates=updates,
                                   name='sgd_update')
        self.params = params
Exemple #46
0
    def setup(self):
        """
        Sets up the main loop. This is also called at the start of the
        main loop, so you need only call it if you're using a driver
        script that replaces the main loop with something else.
        """
        self.model.monitor = Monitor.get_monitor(self.model)
        self.model.monitor.time_budget_exceeded = False
        if self.algorithm is not None:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
        self.setup_extensions()

        # Model.censor_updates is used by the training algorithm to
        # enforce constraints after each step of learning. Here we
        # make sure the constraints are enforced from the start.
        self.model.enforce_constraints()
Exemple #47
0
    def setup(self, model, dataset, algorithm):
        """
        Initializes the momentum schedule based on epochs_seen.

        Parameters
        ----------
        model : pylearn2.models.Model
            The model to which the training algorithm is applied.
        dataset : pylearn2.datasets.Dataset
            The dataset to which the model is applied.
        algorithm : pylearn2.training_algorithms.TrainingAlgorithm
            Describes how gradients should be updated.
        """
        monitor = Monitor.get_monitor(model)
        self._count = monitor.get_epochs_seen()
        self._apply_momentum(algorithm)
Exemple #48
0
def test_extra_costs():

    # Makes sure Monitor.setup checks type of extra_costs

    num_features = 3
    model = DummyModel(num_features=num_features)
    dataset = DummyDataset(num_examples=2, num_features=num_features)
    monitor = Monitor.get_monitor(model)
    extra_costs = [model.get_default_cost()]
    assert_raises(AssertionError, monitor.setup, dataset, 
                  model.get_default_cost(), 1, extra_costs=extra_costs)

    extra_costs = OrderedDict()
    extra_costs['Cost'] = model.get_default_cost()
    monitor.setup(dataset, model.get_default_cost(), 1, 
                  extra_costs=extra_costs)
Exemple #49
0
    def setup(self, model, dataset, algorithm):
        """
        Initializes the decay schedule based on epochs_seen.

        Parameters
        ----------
        model : pylearn2.models.Model
            The model to which the training algorithm is applied.
        dataset : pylearn2.datasets.Dataset
            The dataset to which the model is applied.
        algorithm : pylearn2.training_algorithms.TrainingAlgorithm
            Describes how gradients should be updated.
        """
        monitor = Monitor.get_monitor(model)
        self._count = monitor.get_epochs_seen()
        self._apply_lambda_teach(algorithm)
Exemple #50
0
def test_extra_costs():

    # Makes sure Monitor.setup checks type of extra_costs

    num_features = 3
    model = DummyModel(num_features=num_features)
    dataset = DummyDataset(num_examples=2, num_features=num_features)
    monitor = Monitor.get_monitor(model)
    extra_costs = [model.get_default_cost()]
    assert_raises(AssertionError, monitor.setup, dataset, 
                  model.get_default_cost(), 1, extra_costs=extra_costs)

    extra_costs = OrderedDict()
    extra_costs['Cost'] = model.get_default_cost()
    monitor.setup(dataset, model.get_default_cost(), 1, 
                  extra_costs=extra_costs)
def test_no_data():

    # test that the right error is raised if you
    # add a channel to a monitor that has no datasets

    BATCH_SIZE = 2
    num_examples = BATCH_SIZE
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    name = "num_prereq_calls"

    try:
        monitor.add_channel(name=name, ipt=model.input_space.make_theano_batch(), val=0.0)
    except ValueError, e:
        assert e.message == _err_no_data
        return
Exemple #52
0
def test_valid_after_serialize():

    # Test that serializing the monitor does not ruin it

    BATCH_SIZE = 2
    num_examples = 2 * BATCH_SIZE
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    monitoring_dataset = DummyDataset(num_examples=num_examples, num_features=NUM_FEATURES)
    monitoring_dataset.yaml_src = ""

    monitor.add_dataset(monitoring_dataset, "sequential", batch_size=BATCH_SIZE)

    to_string(monitor)

    monitor.redo_theano()
Exemple #53
0
def test_valid_after_serialize():

    # Test that serializing the monitor does not ruin it

    BATCH_SIZE = 2
    num_examples = 2 * BATCH_SIZE
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    monitoring_dataset = DummyDataset(num_examples = num_examples,
            num_features = NUM_FEATURES)
    monitoring_dataset.yaml_src = ""

    monitor.add_dataset(monitoring_dataset, 'sequential', batch_size=BATCH_SIZE)

    to_string(monitor)

    monitor.redo_theano()
Exemple #54
0
def test_no_data():

    # test that the right error is raised if you
    # add a channel to a monitor that has no datasets

    BATCH_SIZE = 2
    num_examples = BATCH_SIZE
    NUM_FEATURES = 3

    model = DummyModel(NUM_FEATURES)
    monitor = Monitor.get_monitor(model)

    name = 'num_prereq_calls'

    try:
        monitor.add_channel(name = name,
            ipt = model.input_space.make_theano_batch(),
            val = 0.)
    except ValueError, e:
        assert e.message == _err_no_data
        return
Exemple #55
0
    def prep_valtest_monitor(self, model, batch_size):
        if self.topo_view:
            print "topo view"
            minibatch = T.as_tensor_variable(
                self.valid_ddm.get_batch_topo(batch_size), name='minibatch')
        else:
            print "design view"
            minibatch = T.as_tensor_variable(
                self.valid_ddm.get_batch_design(batch_size), name='minibatch')
        target = T.matrix('target')
        valMSE = MissingTargetCost()(model, minibatch, target)
        monitor = Monitor.get_monitor(model)

        monitor.add_dataset(self.valid_ddm, 'sequential', batch_size)
        monitor.add_channel("Validation MSE", (minibatch, target), valMSE,
                            self.valid_ddm)

        if self.test_ddm is not None:
            monitor.add_dataset(self.test_ddm, 'sequential', batch_size)
            monitor.add_channel("Test MSE", (minibatch, target), valMSE,
                                self.test_ddm)
Exemple #56
0
    def setup_monitor(self):
        if self.topo_view:
            print "topo view"
            self.minibatch = T.as_tensor_variable(
                self.valid_ddm.get_batch_topo(self.batch_size),
                name='minibatch')
        else:
            print "design view"
            self.minibatch = T.as_tensor_variable(
                self.valid_ddm.get_batch_design(self.batch_size),
                name='minibatch')

        self.target = T.tensor3('target')

        self.monitor = Monitor.get_monitor(self.model)
        self.log_channel_names = []
        self.log_channel_names.extend(self.base_channel_names)

        self.monitor.add_dataset(self.valid_ddm, 'sequential', self.batch_size)
        if self.test_ddm is not None:
            self.monitor.add_dataset(self.test_ddm, 'sequential',
                                     self.batch_size)
Exemple #57
0
    def on_monitor(self, model, dataset, algorithm):
        noiseClass = self._get_noiseClass(dataset.transformer)
        
        if not self._initialized:
            monitor = Monitor.get_monitor(model)
            monitor.add_channel(name=self.channel_name,
                                ipt=model.get_input_space().make_theano_batch(),
                                val=noiseClass.__dict__[self.var],
                                dataset=dataset)

            self._init_val = noiseClass.__dict__[self.var].get_value()
            self._initialized = True

            if self.bezier:
                if self.bezier1:
                    points = np.array([(0,0), (1.,0.),(1.,0.),(1,1)])
                else:
                    points = np.array([(0,0), (.45,.13),(.62,.64),(1,1)])
                xvals, yvals = bezier_curve(points,self.bezier_count*100)
                xvals = xvals*self.bezier_count
                yvals = yvals*self._init_val

                xis = []
                r_xvals = np.array(list(reversed(xvals)))

                for i in range(self.bezier_count):
                    xis.append(r_xvals.searchsorted(i)) 
                xis.append(yvals.shape[0]-1)
                                                    
                xvals = np.array(list(reversed(xvals)))
                yvals = np.array(list(reversed(yvals)))
                                                                
                xs = xvals[xis]
                self.bezier_ys = list(reversed(yvals[xis]))

        self._count += 1

        noiseClass.__dict__[self.var].set_value(np.cast[config.floatX](self.current_val()))